diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,28375 @@ +{ + "best_metric": 0.0, + "best_model_checkpoint": "./dpo-lora-output/checkpoint-50", + "epoch": 19.473684210526315, + "eval_steps": 50, + "global_step": 1850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010526315789473684, + "grad_norm": 3.8484461128973635e-07, + "learning_rate": 0.00019997894736842106, + "logits/chosen": -0.37785547971725464, + "logits/rejected": -0.37785547971725464, + "logps/chosen": -28.69106674194336, + "logps/rejected": -28.69106674194336, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.021052631578947368, + "grad_norm": 4.627001430890232e-07, + "learning_rate": 0.00019995789473684214, + "logits/chosen": -0.6673249006271362, + "logits/rejected": -0.6673249006271362, + "logps/chosen": -28.038116455078125, + "logps/rejected": -28.038116455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00644683837890625, + "rewards/margins": 0.0, + "rewards/rejected": -0.00644683837890625, + "step": 2 + }, + { + "epoch": 0.031578947368421054, + "grad_norm": 4.5330713760449726e-07, + "learning_rate": 0.00019993684210526318, + "logits/chosen": 0.0750800147652626, + "logits/rejected": 0.0750800147652626, + "logps/chosen": -28.565792083740234, + "logps/rejected": -28.565792083740234, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00939254742115736, + "rewards/margins": 0.0, + "rewards/rejected": -0.00939254742115736, + "step": 3 + }, + { + "epoch": 0.042105263157894736, + "grad_norm": 3.203355163350352e-07, + "learning_rate": 0.0001999157894736842, + "logits/chosen": -0.2510129511356354, + "logits/rejected": -0.2510129511356354, + "logps/chosen": -28.46105194091797, + "logps/rejected": -28.46105194091797, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.018783951178193092, + "rewards/margins": 0.0, + "rewards/rejected": -0.018783951178193092, + "step": 4 + }, + { + "epoch": 0.05263157894736842, + "grad_norm": NaN, + "learning_rate": 0.0001999157894736842, + "logits/chosen": -0.31327107548713684, + "logits/rejected": -0.31327107548713684, + "logps/chosen": -29.54190444946289, + "logps/rejected": -29.54190444946289, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03589210659265518, + "rewards/margins": 0.0, + "rewards/rejected": -0.03589210659265518, + "step": 5 + }, + { + "epoch": 0.06315789473684211, + "grad_norm": 3.0991532184998505e-07, + "learning_rate": 0.00019989473684210526, + "logits/chosen": -0.7301732897758484, + "logits/rejected": -0.7301732897758484, + "logps/chosen": -28.049640655517578, + "logps/rejected": -28.049640655517578, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.023381425067782402, + "rewards/margins": 0.0, + "rewards/rejected": -0.023381425067782402, + "step": 6 + }, + { + "epoch": 0.07368421052631578, + "grad_norm": 5.081353720015613e-07, + "learning_rate": 0.00019987368421052633, + "logits/chosen": -0.15332195162773132, + "logits/rejected": -0.15332195162773132, + "logps/chosen": -29.715986251831055, + "logps/rejected": -29.715986251831055, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03316478803753853, + "rewards/margins": 0.0, + "rewards/rejected": -0.03316478803753853, + "step": 7 + }, + { + "epoch": 0.08421052631578947, + "grad_norm": 4.6199787107070733e-07, + "learning_rate": 0.00019985263157894738, + "logits/chosen": 0.08251968026161194, + "logits/rejected": 0.08251968026161194, + "logps/chosen": -29.024179458618164, + "logps/rejected": -29.024179458618164, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.055231284350156784, + "rewards/margins": 0.0, + "rewards/rejected": -0.055231284350156784, + "step": 8 + }, + { + "epoch": 0.09473684210526316, + "grad_norm": 4.6873628889443353e-07, + "learning_rate": 0.00019983157894736843, + "logits/chosen": 0.0853329673409462, + "logits/rejected": 0.0853329673409462, + "logps/chosen": -29.1766357421875, + "logps/rejected": -29.1766357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.07047691196203232, + "rewards/margins": 0.0, + "rewards/rejected": -0.07047691196203232, + "step": 9 + }, + { + "epoch": 0.10526315789473684, + "grad_norm": 3.2983530218189117e-07, + "learning_rate": 0.00019981052631578948, + "logits/chosen": -0.24946345388889313, + "logits/rejected": -0.24946345388889313, + "logps/chosen": -28.859678268432617, + "logps/rejected": -28.859678268432617, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.058646585792303085, + "rewards/margins": 0.0, + "rewards/rejected": -0.058646585792303085, + "step": 10 + }, + { + "epoch": 0.11578947368421053, + "grad_norm": 3.8660323298245203e-07, + "learning_rate": 0.00019978947368421053, + "logits/chosen": -0.41132038831710815, + "logits/rejected": -0.41132038831710815, + "logps/chosen": -29.74775505065918, + "logps/rejected": -29.74775505065918, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10566883534193039, + "rewards/margins": 0.0, + "rewards/rejected": -0.10566883534193039, + "step": 11 + }, + { + "epoch": 0.12631578947368421, + "grad_norm": 4.862479272560449e-07, + "learning_rate": 0.00019976842105263158, + "logits/chosen": 0.0920749232172966, + "logits/rejected": 0.0920749232172966, + "logps/chosen": -29.676916122436523, + "logps/rejected": -29.676916122436523, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1205049529671669, + "rewards/margins": 0.0, + "rewards/rejected": -0.1205049529671669, + "step": 12 + }, + { + "epoch": 0.1368421052631579, + "grad_norm": 3.2230357760454353e-07, + "learning_rate": 0.00019974736842105263, + "logits/chosen": -0.45576807856559753, + "logits/rejected": -0.45576807856559753, + "logps/chosen": -31.059167861938477, + "logps/rejected": -31.059167861938477, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08003788441419601, + "rewards/margins": 0.0, + "rewards/rejected": -0.08003788441419601, + "step": 13 + }, + { + "epoch": 0.14736842105263157, + "grad_norm": 5.542100893762836e-07, + "learning_rate": 0.0001997263157894737, + "logits/chosen": -0.12948763370513916, + "logits/rejected": -0.12948763370513916, + "logps/chosen": -30.639995574951172, + "logps/rejected": -30.639995574951172, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.12556572258472443, + "rewards/margins": 0.0, + "rewards/rejected": -0.12556572258472443, + "step": 14 + }, + { + "epoch": 0.15789473684210525, + "grad_norm": 4.4282163003117603e-07, + "learning_rate": 0.00019970526315789475, + "logits/chosen": -0.32608121633529663, + "logits/rejected": -0.32608121633529663, + "logps/chosen": -30.3846378326416, + "logps/rejected": -30.3846378326416, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.12016544491052628, + "rewards/margins": 0.0, + "rewards/rejected": -0.12016544491052628, + "step": 15 + }, + { + "epoch": 0.16842105263157894, + "grad_norm": 5.774842293249094e-07, + "learning_rate": 0.0001996842105263158, + "logits/chosen": -0.12218209356069565, + "logits/rejected": -0.12218209356069565, + "logps/chosen": -30.943218231201172, + "logps/rejected": -30.943218231201172, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.15588799118995667, + "rewards/margins": 0.0, + "rewards/rejected": -0.15588799118995667, + "step": 16 + }, + { + "epoch": 0.17894736842105263, + "grad_norm": 3.617555535129213e-07, + "learning_rate": 0.00019966315789473685, + "logits/chosen": -0.2481478750705719, + "logits/rejected": -0.2481478750705719, + "logps/chosen": -29.472448348999023, + "logps/rejected": -29.472448348999023, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.11992359161376953, + "rewards/margins": 0.0, + "rewards/rejected": -0.11992359161376953, + "step": 17 + }, + { + "epoch": 0.18947368421052632, + "grad_norm": 5.118704962114862e-07, + "learning_rate": 0.0001996421052631579, + "logits/chosen": -0.6967118978500366, + "logits/rejected": -0.6967118978500366, + "logps/chosen": -28.57967758178711, + "logps/rejected": -28.57967758178711, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.13177242875099182, + "rewards/margins": 0.0, + "rewards/rejected": -0.13177242875099182, + "step": 18 + }, + { + "epoch": 0.2, + "grad_norm": 4.311013412916509e-07, + "learning_rate": 0.00019962105263157895, + "logits/chosen": -0.33055102825164795, + "logits/rejected": -0.33055102825164795, + "logps/chosen": -33.06563186645508, + "logps/rejected": -33.06563186645508, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.12168388813734055, + "rewards/margins": 0.0, + "rewards/rejected": -0.12168388813734055, + "step": 19 + }, + { + "epoch": 0.21052631578947367, + "grad_norm": 3.7089651527821843e-07, + "learning_rate": 0.0001996, + "logits/chosen": -0.24710780382156372, + "logits/rejected": -0.24710780382156372, + "logps/chosen": -29.734893798828125, + "logps/rejected": -29.734893798828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.14616814255714417, + "rewards/margins": 0.0, + "rewards/rejected": -0.14616814255714417, + "step": 20 + }, + { + "epoch": 0.22105263157894736, + "grad_norm": 4.842980843022815e-07, + "learning_rate": 0.00019957894736842108, + "logits/chosen": -0.3371769189834595, + "logits/rejected": -0.3371769189834595, + "logps/chosen": -31.09835433959961, + "logps/rejected": -31.09835433959961, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.19153709709644318, + "rewards/margins": 0.0, + "rewards/rejected": -0.19153709709644318, + "step": 21 + }, + { + "epoch": 0.23157894736842105, + "grad_norm": 6.520318152070104e-07, + "learning_rate": 0.00019955789473684213, + "logits/chosen": -0.10046061873435974, + "logits/rejected": -0.10046061873435974, + "logps/chosen": -31.823505401611328, + "logps/rejected": -31.823505401611328, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.24391670525074005, + "rewards/margins": 0.0, + "rewards/rejected": -0.24391670525074005, + "step": 22 + }, + { + "epoch": 0.24210526315789474, + "grad_norm": 3.677838549265289e-07, + "learning_rate": 0.00019953684210526317, + "logits/chosen": -0.492754727602005, + "logits/rejected": -0.492754727602005, + "logps/chosen": -31.88969612121582, + "logps/rejected": -31.88969612121582, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.16309070587158203, + "rewards/margins": 0.0, + "rewards/rejected": -0.16309070587158203, + "step": 23 + }, + { + "epoch": 0.25263157894736843, + "grad_norm": 4.003734943580639e-07, + "learning_rate": 0.00019951578947368422, + "logits/chosen": -0.45025864243507385, + "logits/rejected": -0.45025864243507385, + "logps/chosen": -30.97303009033203, + "logps/rejected": -30.97303009033203, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.22819633781909943, + "rewards/margins": 0.0, + "rewards/rejected": -0.22819633781909943, + "step": 24 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.081555289303651e-07, + "learning_rate": 0.00019949473684210527, + "logits/chosen": -0.24635930359363556, + "logits/rejected": -0.24635930359363556, + "logps/chosen": -30.218860626220703, + "logps/rejected": -30.218860626220703, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.1945648193359375, + "rewards/margins": 0.0, + "rewards/rejected": -0.1945648193359375, + "step": 25 + }, + { + "epoch": 0.2736842105263158, + "grad_norm": 6.133328724899911e-07, + "learning_rate": 0.00019947368421052632, + "logits/chosen": 0.11159560084342957, + "logits/rejected": 0.11159560084342957, + "logps/chosen": -31.607969284057617, + "logps/rejected": -31.607969284057617, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3136102855205536, + "rewards/margins": 0.0, + "rewards/rejected": -0.3136102855205536, + "step": 26 + }, + { + "epoch": 0.28421052631578947, + "grad_norm": 5.27759880242229e-07, + "learning_rate": 0.00019945263157894737, + "logits/chosen": -0.7260520458221436, + "logits/rejected": -0.7260520458221436, + "logps/chosen": -29.5076961517334, + "logps/rejected": -29.5076961517334, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.2245742827653885, + "rewards/margins": 0.0, + "rewards/rejected": -0.2245742827653885, + "step": 27 + }, + { + "epoch": 0.29473684210526313, + "grad_norm": 3.844326954549615e-07, + "learning_rate": 0.00019943157894736845, + "logits/chosen": -0.5129251480102539, + "logits/rejected": -0.5129251480102539, + "logps/chosen": -32.33815002441406, + "logps/rejected": -32.33815002441406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.207936093211174, + "rewards/margins": 0.0, + "rewards/rejected": -0.207936093211174, + "step": 28 + }, + { + "epoch": 0.30526315789473685, + "grad_norm": 6.856218988104956e-07, + "learning_rate": 0.0001994105263157895, + "logits/chosen": 0.11357959359884262, + "logits/rejected": 0.11357959359884262, + "logps/chosen": -32.002952575683594, + "logps/rejected": -32.002952575683594, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.35310861468315125, + "rewards/margins": 0.0, + "rewards/rejected": -0.35310861468315125, + "step": 29 + }, + { + "epoch": 0.3157894736842105, + "grad_norm": 8.271791784864035e-07, + "learning_rate": 0.00019938947368421052, + "logits/chosen": 0.11376441270112991, + "logits/rejected": 0.11376441270112991, + "logps/chosen": -32.15091323852539, + "logps/rejected": -32.15091323852539, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3679046630859375, + "rewards/margins": 0.0, + "rewards/rejected": -0.3679046630859375, + "step": 30 + }, + { + "epoch": 0.3263157894736842, + "grad_norm": 5.130211206960666e-07, + "learning_rate": 0.00019936842105263157, + "logits/chosen": -0.3664137125015259, + "logits/rejected": -0.3664137125015259, + "logps/chosen": -34.016319274902344, + "logps/rejected": -34.016319274902344, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.21675263345241547, + "rewards/margins": 0.0, + "rewards/rejected": -0.21675263345241547, + "step": 31 + }, + { + "epoch": 0.3368421052631579, + "grad_norm": 4.994833489035955e-07, + "learning_rate": 0.00019934736842105265, + "logits/chosen": -0.24606359004974365, + "logits/rejected": -0.24606359004974365, + "logps/chosen": -30.879606246948242, + "logps/rejected": -30.879606246948242, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.26063939929008484, + "rewards/margins": 0.0, + "rewards/rejected": -0.26063939929008484, + "step": 32 + }, + { + "epoch": 0.3473684210526316, + "grad_norm": 3.894836311246763e-07, + "learning_rate": 0.0001993263157894737, + "logits/chosen": -0.8016754984855652, + "logits/rejected": -0.8016754984855652, + "logps/chosen": -30.204538345336914, + "logps/rejected": -30.204538345336914, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.23887120187282562, + "rewards/margins": 0.0, + "rewards/rejected": -0.23887120187282562, + "step": 33 + }, + { + "epoch": 0.35789473684210527, + "grad_norm": 7.926749390207988e-07, + "learning_rate": 0.00019930526315789474, + "logits/chosen": 0.11515742540359497, + "logits/rejected": 0.11515742540359497, + "logps/chosen": -32.81196975708008, + "logps/rejected": -32.81196975708008, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4340103268623352, + "rewards/margins": 0.0, + "rewards/rejected": -0.4340103268623352, + "step": 34 + }, + { + "epoch": 0.3684210526315789, + "grad_norm": 5.802260147902416e-07, + "learning_rate": 0.0001992842105263158, + "logits/chosen": -0.7546766996383667, + "logits/rejected": -0.7546766996383667, + "logps/chosen": -30.477094650268555, + "logps/rejected": -30.477094650268555, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3215141296386719, + "rewards/margins": 0.0, + "rewards/rejected": -0.3215141296386719, + "step": 35 + }, + { + "epoch": 0.37894736842105264, + "grad_norm": 9.166934091808798e-07, + "learning_rate": 0.00019926315789473687, + "logits/chosen": -0.07739929854869843, + "logits/rejected": -0.07739929854869843, + "logps/chosen": -33.753623962402344, + "logps/rejected": -33.753623962402344, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.43692857027053833, + "rewards/margins": 0.0, + "rewards/rejected": -0.43692857027053833, + "step": 36 + }, + { + "epoch": 0.3894736842105263, + "grad_norm": 7.862927873247827e-07, + "learning_rate": 0.0001992421052631579, + "logits/chosen": -0.8069098591804504, + "logits/rejected": -0.8069098591804504, + "logps/chosen": -31.68317413330078, + "logps/rejected": -31.68317413330078, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3709526062011719, + "rewards/margins": 0.0, + "rewards/rejected": -0.3709526062011719, + "step": 37 + }, + { + "epoch": 0.4, + "grad_norm": 5.130741556058638e-07, + "learning_rate": 0.00019922105263157894, + "logits/chosen": -0.5547216534614563, + "logits/rejected": -0.5547216534614563, + "logps/chosen": -33.31239318847656, + "logps/rejected": -33.31239318847656, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3053604066371918, + "rewards/margins": 0.0, + "rewards/rejected": -0.3053604066371918, + "step": 38 + }, + { + "epoch": 0.4105263157894737, + "grad_norm": 8.027863032111782e-07, + "learning_rate": 0.00019920000000000002, + "logits/chosen": 0.11398957669734955, + "logits/rejected": 0.11398957669734955, + "logps/chosen": -33.665382385253906, + "logps/rejected": -33.665382385253906, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.519351601600647, + "rewards/margins": 0.0, + "rewards/rejected": -0.519351601600647, + "step": 39 + }, + { + "epoch": 0.42105263157894735, + "grad_norm": 6.375659040713799e-07, + "learning_rate": 0.00019917894736842107, + "logits/chosen": -0.38180088996887207, + "logits/rejected": -0.38180088996887207, + "logps/chosen": -33.36181640625, + "logps/rejected": -33.36181640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4178833067417145, + "rewards/margins": 0.0, + "rewards/rejected": -0.4178833067417145, + "step": 40 + }, + { + "epoch": 0.43157894736842106, + "grad_norm": 1.0929778682111646e-06, + "learning_rate": 0.00019915789473684212, + "logits/chosen": -0.07893847674131393, + "logits/rejected": -0.07893847674131393, + "logps/chosen": -34.562191009521484, + "logps/rejected": -34.562191009521484, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5177852511405945, + "rewards/margins": 0.0, + "rewards/rejected": -0.5177852511405945, + "step": 41 + }, + { + "epoch": 0.4421052631578947, + "grad_norm": 1.0339128948544385e-06, + "learning_rate": 0.00019913684210526317, + "logits/chosen": -0.07934816181659698, + "logits/rejected": -0.07934816181659698, + "logps/chosen": -34.74945831298828, + "logps/rejected": -34.74945831298828, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.536512017250061, + "rewards/margins": 0.0, + "rewards/rejected": -0.536512017250061, + "step": 42 + }, + { + "epoch": 0.45263157894736844, + "grad_norm": 6.676766020063951e-07, + "learning_rate": 0.00019911578947368421, + "logits/chosen": -0.4101516604423523, + "logits/rejected": -0.4101516604423523, + "logps/chosen": -35.05952453613281, + "logps/rejected": -35.05952453613281, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.32107314467430115, + "rewards/margins": 0.0, + "rewards/rejected": -0.32107314467430115, + "step": 43 + }, + { + "epoch": 0.4631578947368421, + "grad_norm": 5.393874857873016e-07, + "learning_rate": 0.00019909473684210526, + "logits/chosen": -0.5251553058624268, + "logits/rejected": -0.5251553058624268, + "logps/chosen": -32.90340805053711, + "logps/rejected": -32.90340805053711, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.421234130859375, + "rewards/margins": 0.0, + "rewards/rejected": -0.421234130859375, + "step": 44 + }, + { + "epoch": 0.47368421052631576, + "grad_norm": 5.218650471761066e-07, + "learning_rate": 0.0001990736842105263, + "logits/chosen": -0.5867052674293518, + "logits/rejected": -0.5867052674293518, + "logps/chosen": -34.09800720214844, + "logps/rejected": -34.09800720214844, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3839218318462372, + "rewards/margins": 0.0, + "rewards/rejected": -0.3839218318462372, + "step": 45 + }, + { + "epoch": 0.4842105263157895, + "grad_norm": 5.97272503455315e-07, + "learning_rate": 0.0001990526315789474, + "logits/chosen": -0.5924510955810547, + "logits/rejected": -0.5924510955810547, + "logps/chosen": -34.22953796386719, + "logps/rejected": -34.22953796386719, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.3970749080181122, + "rewards/margins": 0.0, + "rewards/rejected": -0.3970749080181122, + "step": 46 + }, + { + "epoch": 0.49473684210526314, + "grad_norm": 5.906633759877877e-07, + "learning_rate": 0.00019903157894736844, + "logits/chosen": -0.8118030428886414, + "logits/rejected": -0.8118030428886414, + "logps/chosen": -32.110595703125, + "logps/rejected": -32.110595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4848642349243164, + "rewards/margins": 0.0, + "rewards/rejected": -0.4848642349243164, + "step": 47 + }, + { + "epoch": 0.5052631578947369, + "grad_norm": 6.80591313084733e-07, + "learning_rate": 0.0001990105263157895, + "logits/chosen": -0.25058746337890625, + "logits/rejected": -0.25058746337890625, + "logps/chosen": -32.47521209716797, + "logps/rejected": -32.47521209716797, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4201999604701996, + "rewards/margins": 0.0, + "rewards/rejected": -0.4201999604701996, + "step": 48 + }, + { + "epoch": 0.5157894736842106, + "grad_norm": 6.16580393852928e-07, + "learning_rate": 0.00019898947368421054, + "logits/chosen": -0.6074540615081787, + "logits/rejected": -0.6074540615081787, + "logps/chosen": -34.65234375, + "logps/rejected": -34.65234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.4393554627895355, + "rewards/margins": 0.0, + "rewards/rejected": -0.4393554627895355, + "step": 49 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.2105505220461055e-06, + "learning_rate": 0.00019896842105263159, + "logits/chosen": -0.09993575513362885, + "logits/rejected": -0.09993575513362885, + "logps/chosen": -36.35881042480469, + "logps/rejected": -36.35881042480469, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6974472403526306, + "rewards/margins": 0.0, + "rewards/rejected": -0.6974472403526306, + "step": 50 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.35655614733695984, + "eval_logits/rejected": -0.35655614733695984, + "eval_logps/chosen": -35.03644561767578, + "eval_logps/rejected": -35.03644561767578, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -0.6004651188850403, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -0.6004651188850403, + "eval_runtime": 4.5852, + "eval_samples_per_second": 2.181, + "eval_steps_per_second": 2.181, + "step": 50 + }, + { + "epoch": 0.5368421052631579, + "grad_norm": 1.3206732774051488e-06, + "learning_rate": 0.00019894736842105264, + "logits/chosen": -0.10419120639562607, + "logits/rejected": -0.10419120639562607, + "logps/chosen": -36.56370544433594, + "logps/rejected": -36.56370544433594, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7179366946220398, + "rewards/margins": 0.0, + "rewards/rejected": -0.7179366946220398, + "step": 51 + }, + { + "epoch": 0.5473684210526316, + "grad_norm": 1.391149226037669e-06, + "learning_rate": 0.00019892631578947368, + "logits/chosen": -0.1077694371342659, + "logits/rejected": -0.1077694371342659, + "logps/chosen": -36.836097717285156, + "logps/rejected": -36.836097717285156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7451759576797485, + "rewards/margins": 0.0, + "rewards/rejected": -0.7451759576797485, + "step": 52 + }, + { + "epoch": 0.5578947368421052, + "grad_norm": 6.259977567424357e-07, + "learning_rate": 0.00019890526315789476, + "logits/chosen": -0.629492461681366, + "logits/rejected": -0.629492461681366, + "logps/chosen": -35.2900390625, + "logps/rejected": -35.2900390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.503125011920929, + "rewards/margins": 0.0, + "rewards/rejected": -0.503125011920929, + "step": 53 + }, + { + "epoch": 0.5684210526315789, + "grad_norm": 9.902968258757028e-07, + "learning_rate": 0.0001988842105263158, + "logits/chosen": -0.8901774287223816, + "logits/rejected": -0.8901774287223816, + "logps/chosen": -34.150787353515625, + "logps/rejected": -34.150787353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6177139282226562, + "rewards/margins": 0.0, + "rewards/rejected": -0.6177139282226562, + "step": 54 + }, + { + "epoch": 0.5789473684210527, + "grad_norm": 7.941444550851884e-07, + "learning_rate": 0.00019886315789473686, + "logits/chosen": -0.25454312562942505, + "logits/rejected": -0.25454312562942505, + "logps/chosen": -33.346412658691406, + "logps/rejected": -33.346412658691406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5073200464248657, + "rewards/margins": 0.0, + "rewards/rejected": -0.5073200464248657, + "step": 55 + }, + { + "epoch": 0.5894736842105263, + "grad_norm": 5.561834086620365e-07, + "learning_rate": 0.0001988421052631579, + "logits/chosen": -0.8666642904281616, + "logits/rejected": -0.8666642904281616, + "logps/chosen": -32.6674690246582, + "logps/rejected": -32.6674690246582, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.48516425490379333, + "rewards/margins": 0.0, + "rewards/rejected": -0.48516425490379333, + "step": 56 + }, + { + "epoch": 0.6, + "grad_norm": 1.085165763470286e-06, + "learning_rate": 0.00019882105263157896, + "logits/chosen": -0.9087679386138916, + "logits/rejected": -0.9087679386138916, + "logps/chosen": -34.766693115234375, + "logps/rejected": -34.766693115234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6793045401573181, + "rewards/margins": 0.0, + "rewards/rejected": -0.6793045401573181, + "step": 57 + }, + { + "epoch": 0.6105263157894737, + "grad_norm": 6.484092978098488e-07, + "learning_rate": 0.0001988, + "logits/chosen": -0.8767624497413635, + "logits/rejected": -0.8767624497413635, + "logps/chosen": -33.77532196044922, + "logps/rejected": -33.77532196044922, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6513368487358093, + "rewards/margins": 0.0, + "rewards/rejected": -0.6513368487358093, + "step": 58 + }, + { + "epoch": 0.6210526315789474, + "grad_norm": 5.605424462373776e-07, + "learning_rate": 0.00019877894736842106, + "logits/chosen": -0.8755089044570923, + "logits/rejected": -0.8755089044570923, + "logps/chosen": -33.11381149291992, + "logps/rejected": -33.11381149291992, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5297985076904297, + "rewards/margins": 0.0, + "rewards/rejected": -0.5297985076904297, + "step": 59 + }, + { + "epoch": 0.631578947368421, + "grad_norm": 7.012636160652619e-07, + "learning_rate": 0.00019875789473684213, + "logits/chosen": -0.44773852825164795, + "logits/rejected": -0.44773852825164795, + "logps/chosen": -36.03230285644531, + "logps/rejected": -36.03230285644531, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.6849319338798523, + "rewards/margins": 0.0, + "rewards/rejected": -0.6849319338798523, + "step": 60 + }, + { + "epoch": 0.6421052631578947, + "grad_norm": 7.327849971261458e-07, + "learning_rate": 0.00019873684210526318, + "logits/chosen": -0.4542182385921478, + "logits/rejected": -0.4542182385921478, + "logps/chosen": -36.22119140625, + "logps/rejected": -36.22119140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7038208246231079, + "rewards/margins": 0.0, + "rewards/rejected": -0.7038208246231079, + "step": 61 + }, + { + "epoch": 0.6526315789473685, + "grad_norm": 8.450563200312899e-07, + "learning_rate": 0.0001987157894736842, + "logits/chosen": -0.4958184063434601, + "logits/rejected": -0.4958184063434601, + "logps/chosen": -37.08246612548828, + "logps/rejected": -37.08246612548828, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.5233673453330994, + "rewards/margins": 0.0, + "rewards/rejected": -0.5233673453330994, + "step": 62 + }, + { + "epoch": 0.6631578947368421, + "grad_norm": 6.643780920967401e-07, + "learning_rate": 0.00019869473684210525, + "logits/chosen": -0.8882527351379395, + "logits/rejected": -0.8882527351379395, + "logps/chosen": -33.791908264160156, + "logps/rejected": -33.791908264160156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.597608208656311, + "rewards/margins": 0.0, + "rewards/rejected": -0.597608208656311, + "step": 63 + }, + { + "epoch": 0.6736842105263158, + "grad_norm": 1.3917771184424055e-06, + "learning_rate": 0.00019867368421052633, + "logits/chosen": 0.06700892001390457, + "logits/rejected": 0.06700892001390457, + "logps/chosen": -38.569244384765625, + "logps/rejected": -38.569244384765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0097378492355347, + "rewards/margins": 0.0, + "rewards/rejected": -1.0097378492355347, + "step": 64 + }, + { + "epoch": 0.6842105263157895, + "grad_norm": 1.605672423465876e-06, + "learning_rate": 0.00019865263157894738, + "logits/chosen": 0.060907840728759766, + "logits/rejected": 0.060907840728759766, + "logps/chosen": -38.83467483520508, + "logps/rejected": -38.83467483520508, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.036280870437622, + "rewards/margins": 0.0, + "rewards/rejected": -1.036280870437622, + "step": 65 + }, + { + "epoch": 0.6947368421052632, + "grad_norm": 1.30730154523917e-06, + "learning_rate": 0.00019863157894736843, + "logits/chosen": -0.19624063372612, + "logits/rejected": -0.19624063372612, + "logps/chosen": -40.40142822265625, + "logps/rejected": -40.40142822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.101709008216858, + "rewards/margins": 0.0, + "rewards/rejected": -1.101709008216858, + "step": 66 + }, + { + "epoch": 0.7052631578947368, + "grad_norm": 1.4441985740631935e-06, + "learning_rate": 0.00019861052631578948, + "logits/chosen": 0.04746666178107262, + "logits/rejected": 0.04746666178107262, + "logps/chosen": -39.52649688720703, + "logps/rejected": -39.52649688720703, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1054630279541016, + "rewards/margins": 0.0, + "rewards/rejected": -1.1054630279541016, + "step": 67 + }, + { + "epoch": 0.7157894736842105, + "grad_norm": 1.154596361629956e-06, + "learning_rate": 0.00019858947368421053, + "logits/chosen": -0.2756457030773163, + "logits/rejected": -0.2756457030773163, + "logps/chosen": -35.44649887084961, + "logps/rejected": -35.44649887084961, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.717328667640686, + "rewards/margins": 0.0, + "rewards/rejected": -0.717328667640686, + "step": 68 + }, + { + "epoch": 0.7263157894736842, + "grad_norm": 1.64998914442549e-06, + "learning_rate": 0.00019856842105263158, + "logits/chosen": 0.029611682519316673, + "logits/rejected": 0.029611682519316673, + "logps/chosen": -40.39375305175781, + "logps/rejected": -40.39375305175781, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1921886205673218, + "rewards/margins": 0.0, + "rewards/rejected": -1.1921886205673218, + "step": 69 + }, + { + "epoch": 0.7368421052631579, + "grad_norm": 7.885269610596879e-07, + "learning_rate": 0.00019854736842105263, + "logits/chosen": -0.7255390286445618, + "logits/rejected": -0.7255390286445618, + "logps/chosen": -38.25248718261719, + "logps/rejected": -38.25248718261719, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7993698120117188, + "rewards/margins": 0.0, + "rewards/rejected": -0.7993698120117188, + "step": 70 + }, + { + "epoch": 0.7473684210526316, + "grad_norm": 1.3629967270389898e-06, + "learning_rate": 0.0001985263157894737, + "logits/chosen": -0.283242791891098, + "logits/rejected": -0.283242791891098, + "logps/chosen": -36.21233367919922, + "logps/rejected": -36.21233367919922, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7939121127128601, + "rewards/margins": 0.0, + "rewards/rejected": -0.7939121127128601, + "step": 71 + }, + { + "epoch": 0.7578947368421053, + "grad_norm": 1.0566282071522437e-06, + "learning_rate": 0.00019850526315789475, + "logits/chosen": -0.737543523311615, + "logits/rejected": -0.737543523311615, + "logps/chosen": -38.730262756347656, + "logps/rejected": -38.730262756347656, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.8471474051475525, + "rewards/margins": 0.0, + "rewards/rejected": -0.8471474051475525, + "step": 72 + }, + { + "epoch": 0.7684210526315789, + "grad_norm": 7.137317084016104e-07, + "learning_rate": 0.0001984842105263158, + "logits/chosen": -0.9320713877677917, + "logits/rejected": -0.9320713877677917, + "logps/chosen": -35.780574798583984, + "logps/rejected": -35.780574798583984, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.7964748740196228, + "rewards/margins": 0.0, + "rewards/rejected": -0.7964748740196228, + "step": 73 + }, + { + "epoch": 0.7789473684210526, + "grad_norm": 1.0629120197336306e-06, + "learning_rate": 0.00019846315789473685, + "logits/chosen": -0.7492873072624207, + "logits/rejected": -0.7492873072624207, + "logps/chosen": -39.273284912109375, + "logps/rejected": -39.273284912109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9014496207237244, + "rewards/margins": 0.0, + "rewards/rejected": -0.9014496207237244, + "step": 74 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 1.5789109966135584e-06, + "learning_rate": 0.0001984421052631579, + "logits/chosen": -0.03452115133404732, + "logits/rejected": -0.03452115133404732, + "logps/chosen": -42.939208984375, + "logps/rejected": -42.939208984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.4467343091964722, + "rewards/margins": 0.0, + "rewards/rejected": -1.4467343091964722, + "step": 75 + }, + { + "epoch": 0.8, + "grad_norm": 1.606632508810435e-06, + "learning_rate": 0.00019842105263157895, + "logits/chosen": -0.3005423843860626, + "logits/rejected": -0.3005423843860626, + "logps/chosen": -37.789302825927734, + "logps/rejected": -37.789302825927734, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9516090750694275, + "rewards/margins": 0.0, + "rewards/rejected": -0.9516090750694275, + "step": 76 + }, + { + "epoch": 0.8105263157894737, + "grad_norm": 2.1266146177367773e-06, + "learning_rate": 0.0001984, + "logits/chosen": -1.0360406637191772, + "logits/rejected": -1.0360406637191772, + "logps/chosen": -39.363311767578125, + "logps/rejected": -39.363311767578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.13896644115448, + "rewards/margins": 0.0, + "rewards/rejected": -1.13896644115448, + "step": 77 + }, + { + "epoch": 0.8210526315789474, + "grad_norm": 1.5555992831650656e-06, + "learning_rate": 0.00019837894736842107, + "logits/chosen": -0.3110392987728119, + "logits/rejected": -0.3110392987728119, + "logps/chosen": -38.595924377441406, + "logps/rejected": -38.595924377441406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0322712659835815, + "rewards/margins": 0.0, + "rewards/rejected": -1.0322712659835815, + "step": 78 + }, + { + "epoch": 0.8315789473684211, + "grad_norm": 1.6888642448975588e-06, + "learning_rate": 0.00019835789473684212, + "logits/chosen": -0.2862299978733063, + "logits/rejected": -0.2862299978733063, + "logps/chosen": -45.134254455566406, + "logps/rejected": -45.134254455566406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.5749915838241577, + "rewards/margins": 0.0, + "rewards/rejected": -1.5749915838241577, + "step": 79 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 7.403574500131072e-07, + "learning_rate": 0.00019833684210526317, + "logits/chosen": -0.9862018823623657, + "logits/rejected": -0.9862018823623657, + "logps/chosen": -37.278656005859375, + "logps/rejected": -37.278656005859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9462829828262329, + "rewards/margins": 0.0, + "rewards/rejected": -0.9462829828262329, + "step": 80 + }, + { + "epoch": 0.8526315789473684, + "grad_norm": 1.5618402358086314e-06, + "learning_rate": 0.00019831578947368422, + "logits/chosen": -0.32921895384788513, + "logits/rejected": -0.32921895384788513, + "logps/chosen": -40.038116455078125, + "logps/rejected": -40.038116455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1764904260635376, + "rewards/margins": 0.0, + "rewards/rejected": -1.1764904260635376, + "step": 81 + }, + { + "epoch": 0.8631578947368421, + "grad_norm": NaN, + "learning_rate": 0.00019831578947368422, + "logits/chosen": -1.0665150880813599, + "logits/rejected": -1.0665150880813599, + "logps/chosen": -40.92689514160156, + "logps/rejected": -40.92689514160156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.295324683189392, + "rewards/margins": 0.0, + "rewards/rejected": -1.295324683189392, + "step": 82 + }, + { + "epoch": 0.8736842105263158, + "grad_norm": 8.206494612750248e-07, + "learning_rate": 0.00019829473684210527, + "logits/chosen": -1.011318564414978, + "logits/rejected": -1.011318564414978, + "logps/chosen": -37.846710205078125, + "logps/rejected": -37.846710205078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.003088355064392, + "rewards/margins": 0.0, + "rewards/rejected": -1.003088355064392, + "step": 83 + }, + { + "epoch": 0.8842105263157894, + "grad_norm": 1.1817699032690143e-06, + "learning_rate": 0.00019827368421052632, + "logits/chosen": -0.8081866502761841, + "logits/rejected": -0.8081866502761841, + "logps/chosen": -42.06470489501953, + "logps/rejected": -42.06470489501953, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1805915832519531, + "rewards/margins": 0.0, + "rewards/rejected": -1.1805915832519531, + "step": 84 + }, + { + "epoch": 0.8947368421052632, + "grad_norm": 1.2260217090442893e-06, + "learning_rate": 0.00019825263157894737, + "logits/chosen": -0.7983872294425964, + "logits/rejected": -0.7983872294425964, + "logps/chosen": -40.32405090332031, + "logps/rejected": -40.32405090332031, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.163298487663269, + "rewards/margins": 0.0, + "rewards/rejected": -1.163298487663269, + "step": 85 + }, + { + "epoch": 0.9052631578947369, + "grad_norm": 1.0451011576151359e-06, + "learning_rate": 0.00019823157894736845, + "logits/chosen": -0.6131337285041809, + "logits/rejected": -0.6131337285041809, + "logps/chosen": -41.054622650146484, + "logps/rejected": -41.054622650146484, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.9205829501152039, + "rewards/margins": 0.0, + "rewards/rejected": -0.9205829501152039, + "step": 86 + }, + { + "epoch": 0.9157894736842105, + "grad_norm": 1.6260490838249098e-06, + "learning_rate": 0.0001982105263157895, + "logits/chosen": -0.36974355578422546, + "logits/rejected": -0.36974355578422546, + "logps/chosen": -42.681610107421875, + "logps/rejected": -42.681610107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.4408397674560547, + "rewards/margins": 0.0, + "rewards/rejected": -1.4408397674560547, + "step": 87 + }, + { + "epoch": 0.9263157894736842, + "grad_norm": 2.0204777229082538e-06, + "learning_rate": 0.00019818947368421052, + "logits/chosen": -0.8440442085266113, + "logits/rejected": -0.8440442085266113, + "logps/chosen": -43.71595764160156, + "logps/rejected": -43.71595764160156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3457168340682983, + "rewards/margins": 0.0, + "rewards/rejected": -1.3457168340682983, + "step": 88 + }, + { + "epoch": 0.9368421052631579, + "grad_norm": NaN, + "learning_rate": 0.00019818947368421052, + "logits/chosen": -0.8557513952255249, + "logits/rejected": -0.8557513952255249, + "logps/chosen": -44.365943908691406, + "logps/rejected": -44.365943908691406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.4107154607772827, + "rewards/margins": 0.0, + "rewards/rejected": -1.4107154607772827, + "step": 89 + }, + { + "epoch": 0.9473684210526315, + "grad_norm": 7.714015737292357e-07, + "learning_rate": 0.0001981684210526316, + "logits/chosen": -1.0934278964996338, + "logits/rejected": -1.0934278964996338, + "logps/chosen": -39.80454635620117, + "logps/rejected": -39.80454635620117, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1988719701766968, + "rewards/margins": 0.0, + "rewards/rejected": -1.1988719701766968, + "step": 90 + }, + { + "epoch": 0.9578947368421052, + "grad_norm": 9.797598750083125e-07, + "learning_rate": 0.00019814736842105264, + "logits/chosen": -0.8375434875488281, + "logits/rejected": -0.8375434875488281, + "logps/chosen": -42.35945129394531, + "logps/rejected": -42.35945129394531, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3668384552001953, + "rewards/margins": 0.0, + "rewards/rejected": -1.3668384552001953, + "step": 91 + }, + { + "epoch": 0.968421052631579, + "grad_norm": 1.3935992910774075e-06, + "learning_rate": 0.0001981263157894737, + "logits/chosen": -0.841162919998169, + "logits/rejected": -0.841162919998169, + "logps/chosen": -42.723976135253906, + "logps/rejected": -42.723976135253906, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.4032909870147705, + "rewards/margins": 0.0, + "rewards/rejected": -1.4032909870147705, + "step": 92 + }, + { + "epoch": 0.9789473684210527, + "grad_norm": NaN, + "learning_rate": 0.0001981263157894737, + "logits/chosen": -1.1410332918167114, + "logits/rejected": -1.1410332918167114, + "logps/chosen": -40.401309967041016, + "logps/rejected": -40.401309967041016, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.258548378944397, + "rewards/margins": 0.0, + "rewards/rejected": -1.258548378944397, + "step": 93 + }, + { + "epoch": 0.9894736842105263, + "grad_norm": 1.48494746099459e-06, + "learning_rate": 0.00019810526315789474, + "logits/chosen": -1.1042628288269043, + "logits/rejected": -1.1042628288269043, + "logps/chosen": -39.81610107421875, + "logps/rejected": -39.81610107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.2554148435592651, + "rewards/margins": 0.0, + "rewards/rejected": -1.2554148435592651, + "step": 94 + }, + { + "epoch": 1.0, + "grad_norm": 2.16625994653441e-06, + "learning_rate": 0.00019808421052631582, + "logits/chosen": -0.42906633019447327, + "logits/rejected": -0.42906633019447327, + "logps/chosen": -45.888736724853516, + "logps/rejected": -45.888736724853516, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.7615524530410767, + "rewards/margins": 0.0, + "rewards/rejected": -1.7615524530410767, + "step": 95 + }, + { + "epoch": 1.0105263157894737, + "grad_norm": 3.047139443879132e-06, + "learning_rate": 0.00019806315789473687, + "logits/chosen": -0.4366866648197174, + "logits/rejected": -0.4366866648197174, + "logps/chosen": -46.279537200927734, + "logps/rejected": -46.279537200927734, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.8006324768066406, + "rewards/margins": 0.0, + "rewards/rejected": -1.8006324768066406, + "step": 96 + }, + { + "epoch": 1.0210526315789474, + "grad_norm": 3.3121996239060536e-06, + "learning_rate": 0.0001980421052631579, + "logits/chosen": -0.4385872781276703, + "logits/rejected": -0.4385872781276703, + "logps/chosen": -46.83531951904297, + "logps/rejected": -46.83531951904297, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.856210708618164, + "rewards/margins": 0.0, + "rewards/rejected": -1.856210708618164, + "step": 97 + }, + { + "epoch": 1.0315789473684212, + "grad_norm": NaN, + "learning_rate": 0.0001980421052631579, + "logits/chosen": -1.1934232711791992, + "logits/rejected": -1.1934232711791992, + "logps/chosen": -40.99372482299805, + "logps/rejected": -40.99372482299805, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.317789912223816, + "rewards/margins": 0.0, + "rewards/rejected": -1.317789912223816, + "step": 98 + }, + { + "epoch": 1.0421052631578946, + "grad_norm": 1.223079038936703e-06, + "learning_rate": 0.00019802105263157894, + "logits/chosen": -0.9469152092933655, + "logits/rejected": -0.9469152092933655, + "logps/chosen": -47.82305908203125, + "logps/rejected": -47.82305908203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.7564270496368408, + "rewards/margins": 0.0, + "rewards/rejected": -1.7564270496368408, + "step": 99 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 3.1345925890491344e-06, + "learning_rate": 0.00019800000000000002, + "logits/chosen": -0.6653971076011658, + "logits/rejected": -0.6653971076011658, + "logps/chosen": -43.97892761230469, + "logps/rejected": -43.97892761230469, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.2130135297775269, + "rewards/margins": 0.0, + "rewards/rejected": -1.2130135297775269, + "step": 100 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -0.6583244800567627, + "eval_logits/rejected": -0.6583244800567627, + "eval_logps/chosen": -47.84056854248047, + "eval_logps/rejected": -47.84056854248047, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.880876898765564, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.880876898765564, + "eval_runtime": 4.4813, + "eval_samples_per_second": 2.231, + "eval_steps_per_second": 2.231, + "step": 100 + }, + { + "epoch": 1.063157894736842, + "grad_norm": 1.936478838615585e-06, + "learning_rate": 0.00019797894736842106, + "logits/chosen": -0.43286311626434326, + "logits/rejected": -0.43286311626434326, + "logps/chosen": -49.19889831542969, + "logps/rejected": -49.19889831542969, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.0925686359405518, + "rewards/margins": 0.0, + "rewards/rejected": -2.0925686359405518, + "step": 101 + }, + { + "epoch": 1.0736842105263158, + "grad_norm": 5.185303962207399e-06, + "learning_rate": 0.00019795789473684211, + "logits/chosen": -1.1521388292312622, + "logits/rejected": -1.1521388292312622, + "logps/chosen": -46.65351486206055, + "logps/rejected": -46.65351486206055, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.8679866790771484, + "rewards/margins": 0.0, + "rewards/rejected": -1.8679866790771484, + "step": 102 + }, + { + "epoch": 1.0842105263157895, + "grad_norm": 8.595636700192699e-07, + "learning_rate": 0.00019793684210526316, + "logits/chosen": -1.2388026714324951, + "logits/rejected": -1.2388026714324951, + "logps/chosen": -42.79230880737305, + "logps/rejected": -42.79230880737305, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.4976482391357422, + "rewards/margins": 0.0, + "rewards/rejected": -1.4976482391357422, + "step": 103 + }, + { + "epoch": 1.0947368421052632, + "grad_norm": 2.0735121779580368e-06, + "learning_rate": 0.0001979157894736842, + "logits/chosen": -0.25976407527923584, + "logits/rejected": -0.25976407527923584, + "logps/chosen": -53.21000289916992, + "logps/rejected": -53.21000289916992, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.473813772201538, + "rewards/margins": 0.0, + "rewards/rejected": -2.473813772201538, + "step": 104 + }, + { + "epoch": 1.1052631578947367, + "grad_norm": 2.4458829557261197e-06, + "learning_rate": 0.00019789473684210526, + "logits/chosen": -1.2645772695541382, + "logits/rejected": -1.2645772695541382, + "logps/chosen": -48.962562561035156, + "logps/rejected": -48.962562561035156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.098891496658325, + "rewards/margins": 0.0, + "rewards/rejected": -2.098891496658325, + "step": 105 + }, + { + "epoch": 1.1157894736842104, + "grad_norm": 1.8741600342764286e-06, + "learning_rate": 0.0001978736842105263, + "logits/chosen": -1.3398991823196411, + "logits/rejected": -1.3398991823196411, + "logps/chosen": -43.861549377441406, + "logps/rejected": -43.861549377441406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.6599596738815308, + "rewards/margins": 0.0, + "rewards/rejected": -1.6599596738815308, + "step": 106 + }, + { + "epoch": 1.1263157894736842, + "grad_norm": 2.5885437935357913e-06, + "learning_rate": 0.0001978526315789474, + "logits/chosen": -0.29697465896606445, + "logits/rejected": -0.29697465896606445, + "logps/chosen": -55.49425506591797, + "logps/rejected": -55.49425506591797, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.7022387981414795, + "rewards/margins": 0.0, + "rewards/rejected": -2.7022387981414795, + "step": 107 + }, + { + "epoch": 1.1368421052631579, + "grad_norm": 1.910174660224584e-06, + "learning_rate": 0.00019783157894736844, + "logits/chosen": -0.7355058193206787, + "logits/rejected": -0.7355058193206787, + "logps/chosen": -46.72576141357422, + "logps/rejected": -46.72576141357422, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.7542778253555298, + "rewards/margins": 0.0, + "rewards/rejected": -1.7542778253555298, + "step": 108 + }, + { + "epoch": 1.1473684210526316, + "grad_norm": 1.6612037825325388e-06, + "learning_rate": 0.00019781052631578949, + "logits/chosen": -1.1229007244110107, + "logits/rejected": -1.1229007244110107, + "logps/chosen": -50.912113189697266, + "logps/rejected": -50.912113189697266, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.0653324127197266, + "rewards/margins": 0.0, + "rewards/rejected": -2.0653324127197266, + "step": 109 + }, + { + "epoch": 1.1578947368421053, + "grad_norm": 1.6714056982891634e-06, + "learning_rate": 0.00019778947368421053, + "logits/chosen": -1.4076998233795166, + "logits/rejected": -1.4076998233795166, + "logps/chosen": -46.0490837097168, + "logps/rejected": -46.0490837097168, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.8787130117416382, + "rewards/margins": 0.0, + "rewards/rejected": -1.8787130117416382, + "step": 110 + }, + { + "epoch": 1.168421052631579, + "grad_norm": 2.404258111710078e-06, + "learning_rate": 0.00019776842105263158, + "logits/chosen": -0.4164087772369385, + "logits/rejected": -0.4164087772369385, + "logps/chosen": -55.265403747558594, + "logps/rejected": -55.265403747558594, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.699219226837158, + "rewards/margins": 0.0, + "rewards/rejected": -2.699219226837158, + "step": 111 + }, + { + "epoch": 1.1789473684210527, + "grad_norm": 2.5129768346232595e-06, + "learning_rate": 0.00019774736842105263, + "logits/chosen": -0.48479190468788147, + "logits/rejected": -0.48479190468788147, + "logps/chosen": -57.135475158691406, + "logps/rejected": -57.135475158691406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.775113821029663, + "rewards/margins": 0.0, + "rewards/rejected": -2.775113821029663, + "step": 112 + }, + { + "epoch": 1.1894736842105262, + "grad_norm": 1.6344749838026473e-06, + "learning_rate": 0.00019772631578947368, + "logits/chosen": -1.1951954364776611, + "logits/rejected": -1.1951954364776611, + "logps/chosen": -53.28643798828125, + "logps/rejected": -53.28643798828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.302764892578125, + "rewards/margins": 0.0, + "rewards/rejected": -2.302764892578125, + "step": 113 + }, + { + "epoch": 1.2, + "grad_norm": 2.0741767912113573e-06, + "learning_rate": 0.00019770526315789476, + "logits/chosen": -1.1239030361175537, + "logits/rejected": -1.1239030361175537, + "logps/chosen": -53.50406265258789, + "logps/rejected": -53.50406265258789, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.481299638748169, + "rewards/margins": 0.0, + "rewards/rejected": -2.481299638748169, + "step": 114 + }, + { + "epoch": 1.2105263157894737, + "grad_norm": 3.643415084297885e-06, + "learning_rate": 0.0001976842105263158, + "logits/chosen": -0.4708452522754669, + "logits/rejected": -0.4708452522754669, + "logps/chosen": -63.93821334838867, + "logps/rejected": -63.93821334838867, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.5466346740722656, + "rewards/margins": 0.0, + "rewards/rejected": -3.5466346740722656, + "step": 115 + }, + { + "epoch": 1.2210526315789474, + "grad_norm": 3.5905320601159474e-06, + "learning_rate": 0.00019766315789473686, + "logits/chosen": -0.44383522868156433, + "logits/rejected": -0.44383522868156433, + "logps/chosen": -59.75299072265625, + "logps/rejected": -59.75299072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.147977828979492, + "rewards/margins": 0.0, + "rewards/rejected": -3.147977828979492, + "step": 116 + }, + { + "epoch": 1.231578947368421, + "grad_norm": 3.9981105146580376e-06, + "learning_rate": 0.0001976421052631579, + "logits/chosen": -0.551936686038971, + "logits/rejected": -0.551936686038971, + "logps/chosen": -61.86214828491211, + "logps/rejected": -61.86214828491211, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.2477810382843018, + "rewards/margins": 0.0, + "rewards/rejected": -3.2477810382843018, + "step": 117 + }, + { + "epoch": 1.2421052631578948, + "grad_norm": 3.1762576782057295e-06, + "learning_rate": 0.00019762105263157896, + "logits/chosen": -1.1889318227767944, + "logits/rejected": -1.1889318227767944, + "logps/chosen": -57.67156219482422, + "logps/rejected": -57.67156219482422, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.8980495929718018, + "rewards/margins": 0.0, + "rewards/rejected": -2.8980495929718018, + "step": 118 + }, + { + "epoch": 1.2526315789473683, + "grad_norm": 1.6450478597107576e-06, + "learning_rate": 0.0001976, + "logits/chosen": -1.4779409170150757, + "logits/rejected": -1.4779409170150757, + "logps/chosen": -52.22726058959961, + "logps/rejected": -52.22726058959961, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.496530771255493, + "rewards/margins": 0.0, + "rewards/rejected": -2.496530771255493, + "step": 119 + }, + { + "epoch": 1.263157894736842, + "grad_norm": 4.226046712574316e-06, + "learning_rate": 0.00019757894736842105, + "logits/chosen": -0.6531275510787964, + "logits/rejected": -0.6531275510787964, + "logps/chosen": -72.43806457519531, + "logps/rejected": -72.43806457519531, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.39661979675293, + "rewards/margins": 0.0, + "rewards/rejected": -4.39661979675293, + "step": 120 + }, + { + "epoch": 1.2736842105263158, + "grad_norm": 4.228716534271371e-06, + "learning_rate": 0.00019755789473684213, + "logits/chosen": -0.6977147459983826, + "logits/rejected": -0.6977147459983826, + "logps/chosen": -74.83479309082031, + "logps/rejected": -74.83479309082031, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.636292934417725, + "rewards/margins": 0.0, + "rewards/rejected": -4.636292934417725, + "step": 121 + }, + { + "epoch": 1.2842105263157895, + "grad_norm": 4.3411905608081724e-06, + "learning_rate": 0.00019753684210526318, + "logits/chosen": -0.5020732879638672, + "logits/rejected": -0.5020732879638672, + "logps/chosen": -68.32476806640625, + "logps/rejected": -68.32476806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.005155563354492, + "rewards/margins": 0.0, + "rewards/rejected": -4.005155563354492, + "step": 122 + }, + { + "epoch": 1.2947368421052632, + "grad_norm": 5.095363121654373e-06, + "learning_rate": 0.0001975157894736842, + "logits/chosen": -0.7868385314941406, + "logits/rejected": -0.7868385314941406, + "logps/chosen": -80.11665344238281, + "logps/rejected": -80.11665344238281, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.164478778839111, + "rewards/margins": 0.0, + "rewards/rejected": -5.164478778839111, + "step": 123 + }, + { + "epoch": 1.305263157894737, + "grad_norm": 2.0824199964408763e-06, + "learning_rate": 0.00019749473684210528, + "logits/chosen": -1.533855676651001, + "logits/rejected": -1.533855676651001, + "logps/chosen": -49.14099884033203, + "logps/rejected": -49.14099884033203, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.1325173377990723, + "rewards/margins": 0.0, + "rewards/rejected": -2.1325173377990723, + "step": 124 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 5.472765678860014e-06, + "learning_rate": 0.00019747368421052633, + "logits/chosen": -1.0018510818481445, + "logits/rejected": -1.0018510818481445, + "logps/chosen": -57.671409606933594, + "logps/rejected": -57.671409606933594, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.582261800765991, + "rewards/margins": 0.0, + "rewards/rejected": -2.582261800765991, + "step": 125 + }, + { + "epoch": 1.3263157894736843, + "grad_norm": 4.48773016614723e-06, + "learning_rate": 0.00019745263157894738, + "logits/chosen": -1.3305102586746216, + "logits/rejected": -1.3305102586746216, + "logps/chosen": -69.93067932128906, + "logps/rejected": -69.93067932128906, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.123961448669434, + "rewards/margins": 0.0, + "rewards/rejected": -4.123961448669434, + "step": 126 + }, + { + "epoch": 1.3368421052631578, + "grad_norm": 4.489726961764973e-06, + "learning_rate": 0.00019743157894736843, + "logits/chosen": -1.3374897241592407, + "logits/rejected": -1.3374897241592407, + "logps/chosen": -72.15799713134766, + "logps/rejected": -72.15799713134766, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.34669303894043, + "rewards/margins": 0.0, + "rewards/rejected": -4.34669303894043, + "step": 127 + }, + { + "epoch": 1.3473684210526315, + "grad_norm": 2.618641701701563e-06, + "learning_rate": 0.0001974105263157895, + "logits/chosen": -1.471704125404358, + "logits/rejected": -1.471704125404358, + "logps/chosen": -68.04132080078125, + "logps/rejected": -68.04132080078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.7782533168792725, + "rewards/margins": 0.0, + "rewards/rejected": -3.7782533168792725, + "step": 128 + }, + { + "epoch": 1.3578947368421053, + "grad_norm": 2.595986188680399e-06, + "learning_rate": 0.00019738947368421055, + "logits/chosen": -1.5000438690185547, + "logits/rejected": -1.5000438690185547, + "logps/chosen": -60.63397216796875, + "logps/rejected": -60.63397216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.3372018337249756, + "rewards/margins": 0.0, + "rewards/rejected": -3.3372018337249756, + "step": 129 + }, + { + "epoch": 1.368421052631579, + "grad_norm": 4.121559413761133e-06, + "learning_rate": 0.00019736842105263157, + "logits/chosen": -0.7774177193641663, + "logits/rejected": -0.7774177193641663, + "logps/chosen": -79.11760711669922, + "logps/rejected": -79.11760711669922, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.973327159881592, + "rewards/margins": 0.0, + "rewards/rejected": -4.973327159881592, + "step": 130 + }, + { + "epoch": 1.3789473684210527, + "grad_norm": 2.6745083232526667e-06, + "learning_rate": 0.00019734736842105262, + "logits/chosen": -1.5010743141174316, + "logits/rejected": -1.5010743141174316, + "logps/chosen": -72.77798461914062, + "logps/rejected": -72.77798461914062, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.251919746398926, + "rewards/margins": 0.0, + "rewards/rejected": -4.251919746398926, + "step": 131 + }, + { + "epoch": 1.3894736842105262, + "grad_norm": 6.172317171149189e-06, + "learning_rate": 0.0001973263157894737, + "logits/chosen": -0.6379980444908142, + "logits/rejected": -0.6379980444908142, + "logps/chosen": -86.76994323730469, + "logps/rejected": -86.76994323730469, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.849673271179199, + "rewards/margins": 0.0, + "rewards/rejected": -5.849673271179199, + "step": 132 + }, + { + "epoch": 1.4, + "grad_norm": 2.97183146358293e-06, + "learning_rate": 0.00019730526315789475, + "logits/chosen": -1.270950436592102, + "logits/rejected": -1.270950436592102, + "logps/chosen": -66.19747161865234, + "logps/rejected": -66.19747161865234, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.701448917388916, + "rewards/margins": 0.0, + "rewards/rejected": -3.701448917388916, + "step": 133 + }, + { + "epoch": 1.4105263157894736, + "grad_norm": 3.7268039250193397e-06, + "learning_rate": 0.0001972842105263158, + "logits/chosen": -1.5355697870254517, + "logits/rejected": -1.5355697870254517, + "logps/chosen": -78.51725769042969, + "logps/rejected": -78.51725769042969, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.825847148895264, + "rewards/margins": 0.0, + "rewards/rejected": -4.825847148895264, + "step": 134 + }, + { + "epoch": 1.4210526315789473, + "grad_norm": 7.989572623046115e-06, + "learning_rate": 0.00019726315789473685, + "logits/chosen": -0.9482132792472839, + "logits/rejected": -0.9482132792472839, + "logps/chosen": -105.5180435180664, + "logps/rejected": -105.5180435180664, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.704617500305176, + "rewards/margins": 0.0, + "rewards/rejected": -7.704617500305176, + "step": 135 + }, + { + "epoch": 1.431578947368421, + "grad_norm": 5.123813025420532e-06, + "learning_rate": 0.0001972421052631579, + "logits/chosen": -0.8642720580101013, + "logits/rejected": -0.8642720580101013, + "logps/chosen": -89.34291076660156, + "logps/rejected": -89.34291076660156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.995857238769531, + "rewards/margins": 0.0, + "rewards/rejected": -5.995857238769531, + "step": 136 + }, + { + "epoch": 1.4421052631578948, + "grad_norm": 7.485965397791006e-06, + "learning_rate": 0.00019722105263157895, + "logits/chosen": -1.2587380409240723, + "logits/rejected": -1.2587380409240723, + "logps/chosen": -79.7330322265625, + "logps/rejected": -79.7330322265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.788424015045166, + "rewards/margins": 0.0, + "rewards/rejected": -4.788424015045166, + "step": 137 + }, + { + "epoch": 1.4526315789473685, + "grad_norm": 1.5202248278001207e-06, + "learning_rate": 0.0001972, + "logits/chosen": -1.5258516073226929, + "logits/rejected": -1.5258516073226929, + "logps/chosen": -57.422080993652344, + "logps/rejected": -57.422080993652344, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.960625410079956, + "rewards/margins": 0.0, + "rewards/rejected": -2.960625410079956, + "step": 138 + }, + { + "epoch": 1.4631578947368422, + "grad_norm": 5.481057996803429e-06, + "learning_rate": 0.00019717894736842107, + "logits/chosen": -1.6797330379486084, + "logits/rejected": -1.6797330379486084, + "logps/chosen": -85.02195739746094, + "logps/rejected": -85.02195739746094, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.704831123352051, + "rewards/margins": 0.0, + "rewards/rejected": -5.704831123352051, + "step": 139 + }, + { + "epoch": 1.4736842105263157, + "grad_norm": 1.5807910358489607e-06, + "learning_rate": 0.00019715789473684212, + "logits/chosen": -1.524339199066162, + "logits/rejected": -1.524339199066162, + "logps/chosen": -59.21200942993164, + "logps/rejected": -59.21200942993164, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.139618396759033, + "rewards/margins": 0.0, + "rewards/rejected": -3.139618396759033, + "step": 140 + }, + { + "epoch": 1.4842105263157894, + "grad_norm": 1.2956250429851934e-05, + "learning_rate": 0.00019713684210526317, + "logits/chosen": -1.0321005582809448, + "logits/rejected": -1.0321005582809448, + "logps/chosen": -124.25344848632812, + "logps/rejected": -124.25344848632812, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.578158378601074, + "rewards/margins": 0.0, + "rewards/rejected": -9.578158378601074, + "step": 141 + }, + { + "epoch": 1.4947368421052631, + "grad_norm": 4.446477305464214e-06, + "learning_rate": 0.00019711578947368422, + "logits/chosen": -1.4601666927337646, + "logits/rejected": -1.4601666927337646, + "logps/chosen": -109.08540344238281, + "logps/rejected": -109.08540344238281, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.039433479309082, + "rewards/margins": 0.0, + "rewards/rejected": -8.039433479309082, + "step": 142 + }, + { + "epoch": 1.5052631578947369, + "grad_norm": 6.148378815851174e-06, + "learning_rate": 0.00019709473684210527, + "logits/chosen": -1.7236757278442383, + "logits/rejected": -1.7236757278442383, + "logps/chosen": -95.50958251953125, + "logps/rejected": -95.50958251953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.753593444824219, + "rewards/margins": 0.0, + "rewards/rejected": -6.753593444824219, + "step": 143 + }, + { + "epoch": 1.5157894736842106, + "grad_norm": 2.541372623454663e-06, + "learning_rate": 0.00019707368421052632, + "logits/chosen": -1.5324490070343018, + "logits/rejected": -1.5324490070343018, + "logps/chosen": -63.43986892700195, + "logps/rejected": -63.43986892700195, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.5624043941497803, + "rewards/margins": 0.0, + "rewards/rejected": -3.5624043941497803, + "step": 144 + }, + { + "epoch": 1.526315789473684, + "grad_norm": 1.5561738109681755e-05, + "learning_rate": 0.00019705263157894737, + "logits/chosen": -1.092746615409851, + "logits/rejected": -1.092746615409851, + "logps/chosen": -140.5499267578125, + "logps/rejected": -140.5499267578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -11.207806587219238, + "rewards/margins": 0.0, + "rewards/rejected": -11.207806587219238, + "step": 145 + }, + { + "epoch": 1.5368421052631578, + "grad_norm": 1.0378664228483103e-05, + "learning_rate": 0.00019703157894736844, + "logits/chosen": -1.0650830268859863, + "logits/rejected": -1.0650830268859863, + "logps/chosen": -115.74560546875, + "logps/rejected": -115.74560546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.636126518249512, + "rewards/margins": 0.0, + "rewards/rejected": -8.636126518249512, + "step": 146 + }, + { + "epoch": 1.5473684210526315, + "grad_norm": 7.213519438664662e-06, + "learning_rate": 0.0001970105263157895, + "logits/chosen": -1.0977295637130737, + "logits/rejected": -1.0977295637130737, + "logps/chosen": -120.56465148925781, + "logps/rejected": -120.56465148925781, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.11803150177002, + "rewards/margins": 0.0, + "rewards/rejected": -9.11803150177002, + "step": 147 + }, + { + "epoch": 1.5578947368421052, + "grad_norm": 1.71927113115089e-05, + "learning_rate": 0.00019698947368421054, + "logits/chosen": -1.1391836404800415, + "logits/rejected": -1.1391836404800415, + "logps/chosen": -156.61782836914062, + "logps/rejected": -156.61782836914062, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -12.814597129821777, + "rewards/margins": 0.0, + "rewards/rejected": -12.814597129821777, + "step": 148 + }, + { + "epoch": 1.568421052631579, + "grad_norm": 1.0912938705587294e-05, + "learning_rate": 0.0001969684210526316, + "logits/chosen": -1.1804676055908203, + "logits/rejected": -1.1804676055908203, + "logps/chosen": -132.6087646484375, + "logps/rejected": -132.6087646484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.322443008422852, + "rewards/margins": 0.0, + "rewards/rejected": -10.322443008422852, + "step": 149 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 5.256761141936295e-06, + "learning_rate": 0.00019694736842105264, + "logits/chosen": -1.7506847381591797, + "logits/rejected": -1.7506847381591797, + "logps/chosen": -113.24552154541016, + "logps/rejected": -113.24552154541016, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.298673629760742, + "rewards/margins": 0.0, + "rewards/rejected": -8.298673629760742, + "step": 150 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.4358314275741577, + "eval_logits/rejected": -1.4358314275741577, + "eval_logps/chosen": -133.2693634033203, + "eval_logps/rejected": -133.2693634033203, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -10.42375659942627, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -10.42375659942627, + "eval_runtime": 4.4621, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 2.241, + "step": 150 + }, + { + "epoch": 1.5894736842105264, + "grad_norm": 5.490222520165844e-06, + "learning_rate": 0.0001969263157894737, + "logits/chosen": -1.763961672782898, + "logits/rejected": -1.763961672782898, + "logps/chosen": -116.49879455566406, + "logps/rejected": -116.49879455566406, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.624000549316406, + "rewards/margins": 0.0, + "rewards/rejected": -8.624000549316406, + "step": 151 + }, + { + "epoch": 1.6, + "grad_norm": 9.477753337705508e-06, + "learning_rate": 0.00019690526315789474, + "logits/chosen": -1.051688551902771, + "logits/rejected": -1.051688551902771, + "logps/chosen": -140.0754852294922, + "logps/rejected": -140.0754852294922, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -11.180228233337402, + "rewards/margins": 0.0, + "rewards/rejected": -11.180228233337402, + "step": 152 + }, + { + "epoch": 1.6105263157894738, + "grad_norm": 9.789489013201091e-06, + "learning_rate": 0.00019688421052631582, + "logits/chosen": -1.5712676048278809, + "logits/rejected": -1.5712676048278809, + "logps/chosen": -134.6369171142578, + "logps/rejected": -134.6369171142578, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.278812408447266, + "rewards/margins": 0.0, + "rewards/rejected": -10.278812408447266, + "step": 153 + }, + { + "epoch": 1.6210526315789475, + "grad_norm": 1.5471950973733328e-05, + "learning_rate": 0.00019686315789473687, + "logits/chosen": -1.3094755411148071, + "logits/rejected": -1.3094755411148071, + "logps/chosen": -167.6383819580078, + "logps/rejected": -167.6383819580078, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -13.825404167175293, + "rewards/margins": 0.0, + "rewards/rejected": -13.825404167175293, + "step": 154 + }, + { + "epoch": 1.631578947368421, + "grad_norm": 2.7842013423651224e-06, + "learning_rate": 0.0001968421052631579, + "logits/chosen": -1.6425766944885254, + "logits/rejected": -1.6425766944885254, + "logps/chosen": -79.62688446044922, + "logps/rejected": -79.62688446044922, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.181106090545654, + "rewards/margins": 0.0, + "rewards/rejected": -5.181106090545654, + "step": 155 + }, + { + "epoch": 1.6421052631578947, + "grad_norm": 5.890120519325137e-06, + "learning_rate": 0.00019682105263157896, + "logits/chosen": -1.5033855438232422, + "logits/rejected": -1.5033855438232422, + "logps/chosen": -121.18205261230469, + "logps/rejected": -121.18205261230469, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.199907302856445, + "rewards/margins": 0.0, + "rewards/rejected": -9.199907302856445, + "step": 156 + }, + { + "epoch": 1.6526315789473685, + "grad_norm": 5.56485565539333e-06, + "learning_rate": 0.0001968, + "logits/chosen": -1.4966881275177002, + "logits/rejected": -1.4966881275177002, + "logps/chosen": -126.2377700805664, + "logps/rejected": -126.2377700805664, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.70547866821289, + "rewards/margins": 0.0, + "rewards/rejected": -9.70547866821289, + "step": 157 + }, + { + "epoch": 1.663157894736842, + "grad_norm": 6.466461854870431e-06, + "learning_rate": 0.00019677894736842106, + "logits/chosen": -1.487974762916565, + "logits/rejected": -1.487974762916565, + "logps/chosen": -132.53555297851562, + "logps/rejected": -132.53555297851562, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.335257530212402, + "rewards/margins": 0.0, + "rewards/rejected": -10.335257530212402, + "step": 158 + }, + { + "epoch": 1.6736842105263157, + "grad_norm": 3.8276161831163336e-06, + "learning_rate": 0.0001967578947368421, + "logits/chosen": -1.6804078817367554, + "logits/rejected": -1.6804078817367554, + "logps/chosen": -91.00701904296875, + "logps/rejected": -91.00701904296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.319119453430176, + "rewards/margins": 0.0, + "rewards/rejected": -6.319119453430176, + "step": 159 + }, + { + "epoch": 1.6842105263157894, + "grad_norm": 1.2967062502866611e-05, + "learning_rate": 0.0001967368421052632, + "logits/chosen": -1.1012163162231445, + "logits/rejected": -1.1012163162231445, + "logps/chosen": -182.14588928222656, + "logps/rejected": -182.14588928222656, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -15.38726806640625, + "rewards/margins": 0.0, + "rewards/rejected": -15.38726806640625, + "step": 160 + }, + { + "epoch": 1.694736842105263, + "grad_norm": 7.505981557187624e-06, + "learning_rate": 0.0001967157894736842, + "logits/chosen": -1.452441692352295, + "logits/rejected": -1.452441692352295, + "logps/chosen": -153.8447265625, + "logps/rejected": -153.8447265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -12.466174125671387, + "rewards/margins": 0.0, + "rewards/rejected": -12.466174125671387, + "step": 161 + }, + { + "epoch": 1.7052631578947368, + "grad_norm": 1.2408400834829081e-05, + "learning_rate": 0.00019669473684210526, + "logits/chosen": -1.0768696069717407, + "logits/rejected": -1.0768696069717407, + "logps/chosen": -195.4624786376953, + "logps/rejected": -195.4624786376953, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -16.71892738342285, + "rewards/margins": 0.0, + "rewards/rejected": -16.71892738342285, + "step": 162 + }, + { + "epoch": 1.7157894736842105, + "grad_norm": 1.3855148608854506e-05, + "learning_rate": 0.0001966736842105263, + "logits/chosen": -1.061508059501648, + "logits/rejected": -1.061508059501648, + "logps/chosen": -203.4642333984375, + "logps/rejected": -203.4642333984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -17.519102096557617, + "rewards/margins": 0.0, + "rewards/rejected": -17.519102096557617, + "step": 163 + }, + { + "epoch": 1.7263157894736842, + "grad_norm": 5.5583345783816185e-06, + "learning_rate": 0.00019665263157894739, + "logits/chosen": -1.4160618782043457, + "logits/rejected": -1.4160618782043457, + "logps/chosen": -187.67039489746094, + "logps/rejected": -187.67039489746094, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -15.897933006286621, + "rewards/margins": 0.0, + "rewards/rejected": -15.897933006286621, + "step": 164 + }, + { + "epoch": 1.736842105263158, + "grad_norm": 9.477433195570484e-06, + "learning_rate": 0.00019663157894736843, + "logits/chosen": -1.9752657413482666, + "logits/rejected": -1.9752657413482666, + "logps/chosen": -128.8132781982422, + "logps/rejected": -128.8132781982422, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.155132293701172, + "rewards/margins": 0.0, + "rewards/rejected": -10.155132293701172, + "step": 165 + }, + { + "epoch": 1.7473684210526317, + "grad_norm": 1.932295890583191e-05, + "learning_rate": 0.00019661052631578948, + "logits/chosen": -1.5955842733383179, + "logits/rejected": -1.5955842733383179, + "logps/chosen": -228.4840850830078, + "logps/rejected": -228.4840850830078, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -19.663528442382812, + "rewards/margins": 0.0, + "rewards/rejected": -19.663528442382812, + "step": 166 + }, + { + "epoch": 1.7578947368421054, + "grad_norm": 1.5974874258972704e-05, + "learning_rate": 0.00019658947368421053, + "logits/chosen": -1.0810773372650146, + "logits/rejected": -1.0810773372650146, + "logps/chosen": -252.7413787841797, + "logps/rejected": -252.7413787841797, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -22.335704803466797, + "rewards/margins": 0.0, + "rewards/rejected": -22.335704803466797, + "step": 167 + }, + { + "epoch": 1.768421052631579, + "grad_norm": 9.09858317754697e-06, + "learning_rate": 0.00019656842105263158, + "logits/chosen": -1.4648233652114868, + "logits/rejected": -1.4648233652114868, + "logps/chosen": -198.26095581054688, + "logps/rejected": -198.26095581054688, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -16.800216674804688, + "rewards/margins": 0.0, + "rewards/rejected": -16.800216674804688, + "step": 168 + }, + { + "epoch": 1.7789473684210526, + "grad_norm": 1.4740438928129151e-05, + "learning_rate": 0.00019654736842105263, + "logits/chosen": -1.0363273620605469, + "logits/rejected": -1.0363273620605469, + "logps/chosen": -286.6956787109375, + "logps/rejected": -286.6956787109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -25.8223819732666, + "rewards/margins": 0.0, + "rewards/rejected": -25.8223819732666, + "step": 169 + }, + { + "epoch": 1.7894736842105263, + "grad_norm": 1.0695047421904746e-05, + "learning_rate": 0.00019652631578947368, + "logits/chosen": -0.8622230887413025, + "logits/rejected": -0.8622230887413025, + "logps/chosen": -252.5042266845703, + "logps/rejected": -252.5042266845703, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -22.4231014251709, + "rewards/margins": 0.0, + "rewards/rejected": -22.4231014251709, + "step": 170 + }, + { + "epoch": 1.8, + "grad_norm": 8.539610462321434e-06, + "learning_rate": 0.00019650526315789476, + "logits/chosen": -1.2167575359344482, + "logits/rejected": -1.2167575359344482, + "logps/chosen": -221.032958984375, + "logps/rejected": -221.032958984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -19.077417373657227, + "rewards/margins": 0.0, + "rewards/rejected": -19.077417373657227, + "step": 171 + }, + { + "epoch": 1.8105263157894735, + "grad_norm": 1.6939706256380305e-05, + "learning_rate": 0.0001964842105263158, + "logits/chosen": -0.8433440923690796, + "logits/rejected": -0.8433440923690796, + "logps/chosen": -298.406982421875, + "logps/rejected": -298.406982421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -26.902265548706055, + "rewards/margins": 0.0, + "rewards/rejected": -26.902265548706055, + "step": 172 + }, + { + "epoch": 1.8210526315789473, + "grad_norm": 7.195099442469655e-06, + "learning_rate": 0.00019646315789473686, + "logits/chosen": -1.062929630279541, + "logits/rejected": -1.062929630279541, + "logps/chosen": -232.75599670410156, + "logps/rejected": -232.75599670410156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -20.24972152709961, + "rewards/margins": 0.0, + "rewards/rejected": -20.24972152709961, + "step": 173 + }, + { + "epoch": 1.831578947368421, + "grad_norm": 7.192861630755942e-06, + "learning_rate": 0.0001964421052631579, + "logits/chosen": -1.0091142654418945, + "logits/rejected": -1.0091142654418945, + "logps/chosen": -240.50201416015625, + "logps/rejected": -240.50201416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -21.024322509765625, + "rewards/margins": 0.0, + "rewards/rejected": -21.024322509765625, + "step": 174 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 1.6050831618485972e-05, + "learning_rate": 0.00019642105263157895, + "logits/chosen": -0.6946796774864197, + "logits/rejected": -0.6946796774864197, + "logps/chosen": -343.2923889160156, + "logps/rejected": -343.2923889160156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -31.390806198120117, + "rewards/margins": 0.0, + "rewards/rejected": -31.390806198120117, + "step": 175 + }, + { + "epoch": 1.8526315789473684, + "grad_norm": 1.0542914424149785e-05, + "learning_rate": 0.0001964, + "logits/chosen": -0.6068615317344666, + "logits/rejected": -0.6068615317344666, + "logps/chosen": -293.0101623535156, + "logps/rejected": -293.0101623535156, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -26.47369384765625, + "rewards/margins": 0.0, + "rewards/rejected": -26.47369384765625, + "step": 176 + }, + { + "epoch": 1.8631578947368421, + "grad_norm": 6.1548585108539555e-06, + "learning_rate": 0.00019637894736842105, + "logits/chosen": -0.8551391363143921, + "logits/rejected": -0.8551391363143921, + "logps/chosen": -266.0399169921875, + "logps/rejected": -266.0399169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -23.578113555908203, + "rewards/margins": 0.0, + "rewards/rejected": -23.578113555908203, + "step": 177 + }, + { + "epoch": 1.8736842105263158, + "grad_norm": 1.620031071070116e-05, + "learning_rate": 0.00019635789473684213, + "logits/chosen": -0.5634011626243591, + "logits/rejected": -0.5634011626243591, + "logps/chosen": -378.9129943847656, + "logps/rejected": -378.9129943847656, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -35.04411315917969, + "rewards/margins": 0.0, + "rewards/rejected": -35.04411315917969, + "step": 178 + }, + { + "epoch": 1.8842105263157896, + "grad_norm": 1.1069708307331894e-05, + "learning_rate": 0.00019633684210526318, + "logits/chosen": -0.471484899520874, + "logits/rejected": -0.471484899520874, + "logps/chosen": -324.33038330078125, + "logps/rejected": -324.33038330078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -29.605716705322266, + "rewards/margins": 0.0, + "rewards/rejected": -29.605716705322266, + "step": 179 + }, + { + "epoch": 1.8947368421052633, + "grad_norm": 1.13854057417484e-05, + "learning_rate": 0.00019631578947368423, + "logits/chosen": -0.4289996027946472, + "logits/rejected": -0.4289996027946472, + "logps/chosen": -338.95654296875, + "logps/rejected": -338.95654296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -31.06833267211914, + "rewards/margins": 0.0, + "rewards/rejected": -31.06833267211914, + "step": 180 + }, + { + "epoch": 1.905263157894737, + "grad_norm": 7.206224836409092e-06, + "learning_rate": 0.00019629473684210528, + "logits/chosen": -0.637728214263916, + "logits/rejected": -0.637728214263916, + "logps/chosen": -293.6654052734375, + "logps/rejected": -293.6654052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -26.340662002563477, + "rewards/margins": 0.0, + "rewards/rejected": -26.340662002563477, + "step": 181 + }, + { + "epoch": 1.9157894736842105, + "grad_norm": 7.431865924445447e-06, + "learning_rate": 0.00019627368421052633, + "logits/chosen": -0.798254132270813, + "logits/rejected": -0.798254132270813, + "logps/chosen": -211.24868774414062, + "logps/rejected": -211.24868774414062, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -18.343286514282227, + "rewards/margins": 0.0, + "rewards/rejected": -18.343286514282227, + "step": 182 + }, + { + "epoch": 1.9263157894736842, + "grad_norm": 1.6567742932238616e-05, + "learning_rate": 0.00019625263157894738, + "logits/chosen": -0.25748512148857117, + "logits/rejected": -0.25748512148857117, + "logps/chosen": -448.50189208984375, + "logps/rejected": -448.50189208984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -42.00300216674805, + "rewards/margins": 0.0, + "rewards/rejected": -42.00300216674805, + "step": 183 + }, + { + "epoch": 1.936842105263158, + "grad_norm": 1.677876389294397e-05, + "learning_rate": 0.00019623157894736842, + "logits/chosen": -0.15099215507507324, + "logits/rejected": -0.15099215507507324, + "logps/chosen": -471.9569091796875, + "logps/rejected": -471.9569091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -44.348506927490234, + "rewards/margins": 0.0, + "rewards/rejected": -44.348506927490234, + "step": 184 + }, + { + "epoch": 1.9473684210526314, + "grad_norm": 8.614895705250092e-06, + "learning_rate": 0.0001962105263157895, + "logits/chosen": -0.37013179063796997, + "logits/rejected": -0.37013179063796997, + "logps/chosen": -238.4066162109375, + "logps/rejected": -238.4066162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -21.11446762084961, + "rewards/margins": 0.0, + "rewards/rejected": -21.11446762084961, + "step": 185 + }, + { + "epoch": 1.9578947368421051, + "grad_norm": 3.001785989908967e-05, + "learning_rate": 0.00019618947368421055, + "logits/chosen": 0.251768559217453, + "logits/rejected": 0.251768559217453, + "logps/chosen": -510.8426208496094, + "logps/rejected": -510.8426208496094, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -48.14582824707031, + "rewards/margins": 0.0, + "rewards/rejected": -48.14582824707031, + "step": 186 + }, + { + "epoch": 1.9684210526315788, + "grad_norm": 2.127635343640577e-05, + "learning_rate": 0.00019616842105263157, + "logits/chosen": -0.1367361694574356, + "logits/rejected": -0.1367361694574356, + "logps/chosen": -355.6160583496094, + "logps/rejected": -355.6160583496094, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -32.76424026489258, + "rewards/margins": 0.0, + "rewards/rejected": -32.76424026489258, + "step": 187 + }, + { + "epoch": 1.9789473684210526, + "grad_norm": 2.330858842469752e-05, + "learning_rate": 0.00019614736842105262, + "logits/chosen": 0.042760226875543594, + "logits/rejected": 0.042760226875543594, + "logps/chosen": -382.27593994140625, + "logps/rejected": -382.27593994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -35.430233001708984, + "rewards/margins": 0.0, + "rewards/rejected": -35.430233001708984, + "step": 188 + }, + { + "epoch": 1.9894736842105263, + "grad_norm": 1.5866742614889517e-05, + "learning_rate": 0.0001961263157894737, + "logits/chosen": 0.2904311418533325, + "logits/rejected": 0.2904311418533325, + "logps/chosen": -351.369873046875, + "logps/rejected": -351.369873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -32.267879486083984, + "rewards/margins": 0.0, + "rewards/rejected": -32.267879486083984, + "step": 189 + }, + { + "epoch": 2.0, + "grad_norm": 1.4786298379476648e-05, + "learning_rate": 0.00019610526315789475, + "logits/chosen": 0.43049368262290955, + "logits/rejected": 0.43049368262290955, + "logps/chosen": -276.5857238769531, + "logps/rejected": -276.5857238769531, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -24.876989364624023, + "rewards/margins": 0.0, + "rewards/rejected": -24.876989364624023, + "step": 190 + }, + { + "epoch": 2.0105263157894737, + "grad_norm": NaN, + "learning_rate": 0.00019610526315789475, + "logits/chosen": 1.0581265687942505, + "logits/rejected": 1.0581265687942505, + "logps/chosen": -421.4101867675781, + "logps/rejected": -421.4101867675781, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -38.95614242553711, + "rewards/margins": 0.0, + "rewards/rejected": -38.95614242553711, + "step": 191 + }, + { + "epoch": 2.0210526315789474, + "grad_norm": NaN, + "learning_rate": 0.00019610526315789475, + "logits/chosen": 2.1765575408935547, + "logits/rejected": 2.1765575408935547, + "logps/chosen": -624.4765625, + "logps/rejected": -624.4765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -59.50922393798828, + "rewards/margins": 0.0, + "rewards/rejected": -59.50922393798828, + "step": 192 + }, + { + "epoch": 2.031578947368421, + "grad_norm": 1.6681724446243607e-05, + "learning_rate": 0.0001960842105263158, + "logits/chosen": 0.8768024444580078, + "logits/rejected": 0.8768024444580078, + "logps/chosen": -303.2098388671875, + "logps/rejected": -303.2098388671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -27.53940200805664, + "rewards/margins": 0.0, + "rewards/rejected": -27.53940200805664, + "step": 193 + }, + { + "epoch": 2.042105263157895, + "grad_norm": 2.653021874721162e-05, + "learning_rate": 0.00019606315789473687, + "logits/chosen": 2.153998851776123, + "logits/rejected": 2.153998851776123, + "logps/chosen": -414.113037109375, + "logps/rejected": -414.113037109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -38.49300765991211, + "rewards/margins": 0.0, + "rewards/rejected": -38.49300765991211, + "step": 194 + }, + { + "epoch": 2.0526315789473686, + "grad_norm": 5.0161706894868985e-05, + "learning_rate": 0.0001960421052631579, + "logits/chosen": 2.193232297897339, + "logits/rejected": 2.193232297897339, + "logps/chosen": -481.1329345703125, + "logps/rejected": -481.1329345703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -44.92841720581055, + "rewards/margins": 0.0, + "rewards/rejected": -44.92841720581055, + "step": 195 + }, + { + "epoch": 2.0631578947368423, + "grad_norm": 4.694354356615804e-05, + "learning_rate": 0.00019602105263157894, + "logits/chosen": 3.333601474761963, + "logits/rejected": 3.333601474761963, + "logps/chosen": -658.974853515625, + "logps/rejected": -658.974853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -62.95905303955078, + "rewards/margins": 0.0, + "rewards/rejected": -62.95905303955078, + "step": 196 + }, + { + "epoch": 2.0736842105263156, + "grad_norm": 3.98053161916323e-05, + "learning_rate": 0.000196, + "logits/chosen": 3.93233060836792, + "logits/rejected": 3.93233060836792, + "logps/chosen": -516.2655029296875, + "logps/rejected": -516.2655029296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -48.757442474365234, + "rewards/margins": 0.0, + "rewards/rejected": -48.757442474365234, + "step": 197 + }, + { + "epoch": 2.0842105263157893, + "grad_norm": NaN, + "learning_rate": 0.000196, + "logits/chosen": 4.583979606628418, + "logits/rejected": 4.583979606628418, + "logps/chosen": -566.2434692382812, + "logps/rejected": -566.2434692382812, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -53.797027587890625, + "rewards/margins": 0.0, + "rewards/rejected": -53.797027587890625, + "step": 198 + }, + { + "epoch": 2.094736842105263, + "grad_norm": 4.385167267173529e-05, + "learning_rate": 0.00019597894736842107, + "logits/chosen": 4.913366794586182, + "logits/rejected": 4.913366794586182, + "logps/chosen": -737.3299560546875, + "logps/rejected": -737.3299560546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -70.88581085205078, + "rewards/margins": 0.0, + "rewards/rejected": -70.88581085205078, + "step": 199 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 4.879558764514513e-05, + "learning_rate": 0.00019595789473684212, + "logits/chosen": 5.830333232879639, + "logits/rejected": 5.830333232879639, + "logps/chosen": -820.52880859375, + "logps/rejected": -820.52880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -79.20569610595703, + "rewards/margins": 0.0, + "rewards/rejected": -79.20569610595703, + "step": 200 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": 6.817009925842285, + "eval_logits/rejected": 6.817009925842285, + "eval_logps/chosen": -809.7173461914062, + "eval_logps/rejected": -809.7173461914062, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -78.06855773925781, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -78.06855773925781, + "eval_runtime": 4.5433, + "eval_samples_per_second": 2.201, + "eval_steps_per_second": 2.201, + "step": 200 + }, + { + "epoch": 2.1157894736842104, + "grad_norm": 2.9046863346593454e-05, + "learning_rate": 0.00019593684210526317, + "logits/chosen": 6.449033737182617, + "logits/rejected": 6.449033737182617, + "logps/chosen": -673.6071166992188, + "logps/rejected": -673.6071166992188, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -64.49160766601562, + "rewards/margins": 0.0, + "rewards/rejected": -64.49160766601562, + "step": 201 + }, + { + "epoch": 2.126315789473684, + "grad_norm": 5.5444274039473385e-05, + "learning_rate": 0.00019591578947368422, + "logits/chosen": 8.087075233459473, + "logits/rejected": 8.087075233459473, + "logps/chosen": -1076.608642578125, + "logps/rejected": -1076.608642578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -104.7224349975586, + "rewards/margins": 0.0, + "rewards/rejected": -104.7224349975586, + "step": 202 + }, + { + "epoch": 2.136842105263158, + "grad_norm": 3.140119224553928e-05, + "learning_rate": 0.00019589473684210527, + "logits/chosen": 8.09109878540039, + "logits/rejected": 8.09109878540039, + "logps/chosen": -531.2365112304688, + "logps/rejected": -531.2365112304688, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -50.34206771850586, + "rewards/margins": 0.0, + "rewards/rejected": -50.34206771850586, + "step": 203 + }, + { + "epoch": 2.1473684210526316, + "grad_norm": 5.327671897248365e-05, + "learning_rate": 0.00019587368421052632, + "logits/chosen": 9.973334312438965, + "logits/rejected": 9.973334312438965, + "logps/chosen": -1248.2523193359375, + "logps/rejected": -1248.2523193359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -121.88679504394531, + "rewards/margins": 0.0, + "rewards/rejected": -121.88679504394531, + "step": 204 + }, + { + "epoch": 2.1578947368421053, + "grad_norm": 1.3940511053078808e-05, + "learning_rate": 0.00019585263157894737, + "logits/chosen": 9.696736335754395, + "logits/rejected": 9.696736335754395, + "logps/chosen": -613.7274780273438, + "logps/rejected": -613.7274780273438, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -58.591163635253906, + "rewards/margins": 0.0, + "rewards/rejected": -58.591163635253906, + "step": 205 + }, + { + "epoch": 2.168421052631579, + "grad_norm": 2.2086864191805944e-05, + "learning_rate": 0.00019583157894736844, + "logits/chosen": 11.36934757232666, + "logits/rejected": 11.36934757232666, + "logps/chosen": -916.5037841796875, + "logps/rejected": -916.5037841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -88.62450408935547, + "rewards/margins": 0.0, + "rewards/rejected": -88.62450408935547, + "step": 206 + }, + { + "epoch": 2.1789473684210527, + "grad_norm": 1.9237877495470457e-05, + "learning_rate": 0.0001958105263157895, + "logits/chosen": 11.874436378479004, + "logits/rejected": 11.874436378479004, + "logps/chosen": -958.419921875, + "logps/rejected": -958.419921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -92.81611633300781, + "rewards/margins": 0.0, + "rewards/rejected": -92.81611633300781, + "step": 207 + }, + { + "epoch": 2.1894736842105265, + "grad_norm": 1.4395719517779071e-05, + "learning_rate": 0.00019578947368421054, + "logits/chosen": 12.384649276733398, + "logits/rejected": 12.384649276733398, + "logps/chosen": -1001.62060546875, + "logps/rejected": -1001.62060546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -97.13618469238281, + "rewards/margins": 0.0, + "rewards/rejected": -97.13618469238281, + "step": 208 + }, + { + "epoch": 2.2, + "grad_norm": 2.323817170690745e-05, + "learning_rate": 0.0001957684210526316, + "logits/chosen": 12.605050086975098, + "logits/rejected": 12.605050086975098, + "logps/chosen": -1179.7540283203125, + "logps/rejected": -1179.7540283203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -115.17803955078125, + "rewards/margins": 0.0, + "rewards/rejected": -115.17803955078125, + "step": 209 + }, + { + "epoch": 2.2105263157894735, + "grad_norm": 2.976310861413367e-05, + "learning_rate": 0.00019574736842105264, + "logits/chosen": 13.146588325500488, + "logits/rejected": 13.146588325500488, + "logps/chosen": -1539.37890625, + "logps/rejected": -1539.37890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -150.9994659423828, + "rewards/margins": 0.0, + "rewards/rejected": -150.9994659423828, + "step": 210 + }, + { + "epoch": 2.221052631578947, + "grad_norm": 2.592038072180003e-05, + "learning_rate": 0.0001957263157894737, + "logits/chosen": 13.31721305847168, + "logits/rejected": 13.31721305847168, + "logps/chosen": -1449.75341796875, + "logps/rejected": -1449.75341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -142.12815856933594, + "rewards/margins": 0.0, + "rewards/rejected": -142.12815856933594, + "step": 211 + }, + { + "epoch": 2.231578947368421, + "grad_norm": 1.8985463611898012e-05, + "learning_rate": 0.00019570526315789474, + "logits/chosen": 14.342009544372559, + "logits/rejected": 14.342009544372559, + "logps/chosen": -1189.3717041015625, + "logps/rejected": -1189.3717041015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -116.10985565185547, + "rewards/margins": 0.0, + "rewards/rejected": -116.10985565185547, + "step": 212 + }, + { + "epoch": 2.2421052631578946, + "grad_norm": 8.969174814410508e-06, + "learning_rate": 0.00019568421052631581, + "logits/chosen": 14.59449577331543, + "logits/rejected": 14.59449577331543, + "logps/chosen": -963.65234375, + "logps/rejected": -963.65234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -93.6390380859375, + "rewards/margins": 0.0, + "rewards/rejected": -93.6390380859375, + "step": 213 + }, + { + "epoch": 2.2526315789473683, + "grad_norm": 8.754281225265004e-06, + "learning_rate": 0.00019566315789473686, + "logits/chosen": 15.000722885131836, + "logits/rejected": 15.000722885131836, + "logps/chosen": -996.5911254882812, + "logps/rejected": -996.5911254882812, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -96.93291473388672, + "rewards/margins": 0.0, + "rewards/rejected": -96.93291473388672, + "step": 214 + }, + { + "epoch": 2.263157894736842, + "grad_norm": 2.284335823787842e-05, + "learning_rate": 0.00019564210526315789, + "logits/chosen": 15.543832778930664, + "logits/rejected": 15.543832778930664, + "logps/chosen": -1338.6514892578125, + "logps/rejected": -1338.6514892578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -131.03782653808594, + "rewards/margins": 0.0, + "rewards/rejected": -131.03782653808594, + "step": 215 + }, + { + "epoch": 2.2736842105263158, + "grad_norm": 8.72291548148496e-06, + "learning_rate": 0.00019562105263157896, + "logits/chosen": 15.671738624572754, + "logits/rejected": 15.671738624572754, + "logps/chosen": -1070.574951171875, + "logps/rejected": -1070.574951171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -104.331298828125, + "rewards/margins": 0.0, + "rewards/rejected": -104.331298828125, + "step": 216 + }, + { + "epoch": 2.2842105263157895, + "grad_norm": 1.989948759728577e-05, + "learning_rate": 0.0001956, + "logits/chosen": 16.083484649658203, + "logits/rejected": 16.083484649658203, + "logps/chosen": -1369.2926025390625, + "logps/rejected": -1369.2926025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -133.90338134765625, + "rewards/margins": 0.0, + "rewards/rejected": -133.90338134765625, + "step": 217 + }, + { + "epoch": 2.294736842105263, + "grad_norm": 1.5831390555831604e-05, + "learning_rate": 0.00019557894736842106, + "logits/chosen": 16.6643123626709, + "logits/rejected": 16.6643123626709, + "logps/chosen": -1440.6026611328125, + "logps/rejected": -1440.6026611328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -141.1419677734375, + "rewards/margins": 0.0, + "rewards/rejected": -141.1419677734375, + "step": 218 + }, + { + "epoch": 2.305263157894737, + "grad_norm": 3.188383925589733e-05, + "learning_rate": 0.0001955578947368421, + "logits/chosen": 16.663028717041016, + "logits/rejected": 16.663028717041016, + "logps/chosen": -2150.78564453125, + "logps/rejected": -2150.78564453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -212.14013671875, + "rewards/margins": 0.0, + "rewards/rejected": -212.14013671875, + "step": 219 + }, + { + "epoch": 2.3157894736842106, + "grad_norm": 2.217062865383923e-05, + "learning_rate": 0.00019553684210526319, + "logits/chosen": 17.02804183959961, + "logits/rejected": 17.02804183959961, + "logps/chosen": -1657.6380615234375, + "logps/rejected": -1657.6380615234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -162.8946990966797, + "rewards/margins": 0.0, + "rewards/rejected": -162.8946990966797, + "step": 220 + }, + { + "epoch": 2.3263157894736843, + "grad_norm": 1.8672544683795422e-05, + "learning_rate": 0.00019551578947368424, + "logits/chosen": 16.935508728027344, + "logits/rejected": 16.935508728027344, + "logps/chosen": -1385.3035888671875, + "logps/rejected": -1385.3035888671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -135.80416870117188, + "rewards/margins": 0.0, + "rewards/rejected": -135.80416870117188, + "step": 221 + }, + { + "epoch": 2.336842105263158, + "grad_norm": 1.8624623407959007e-05, + "learning_rate": 0.00019549473684210526, + "logits/chosen": 17.23211669921875, + "logits/rejected": 17.23211669921875, + "logps/chosen": -1825.585693359375, + "logps/rejected": -1825.585693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -179.53269958496094, + "rewards/margins": 0.0, + "rewards/rejected": -179.53269958496094, + "step": 222 + }, + { + "epoch": 2.3473684210526318, + "grad_norm": 1.575539863551967e-05, + "learning_rate": 0.0001954736842105263, + "logits/chosen": 17.0297794342041, + "logits/rejected": 17.0297794342041, + "logps/chosen": -1393.029296875, + "logps/rejected": -1393.029296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -136.52134704589844, + "rewards/margins": 0.0, + "rewards/rejected": -136.52134704589844, + "step": 223 + }, + { + "epoch": 2.3578947368421055, + "grad_norm": 3.4856082493206486e-05, + "learning_rate": 0.00019545263157894738, + "logits/chosen": 17.499181747436523, + "logits/rejected": 17.499181747436523, + "logps/chosen": -2287.0009765625, + "logps/rejected": -2287.0009765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -225.87278747558594, + "rewards/margins": 0.0, + "rewards/rejected": -225.87278747558594, + "step": 224 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 5.088413308840245e-05, + "learning_rate": 0.00019543157894736843, + "logits/chosen": 17.674972534179688, + "logits/rejected": 17.674972534179688, + "logps/chosen": -2397.51611328125, + "logps/rejected": -2397.51611328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -236.8824920654297, + "rewards/margins": 0.0, + "rewards/rejected": -236.8824920654297, + "step": 225 + }, + { + "epoch": 2.3789473684210525, + "grad_norm": 8.878765947883949e-05, + "learning_rate": 0.00019541052631578948, + "logits/chosen": 17.203372955322266, + "logits/rejected": 17.203372955322266, + "logps/chosen": -3492.2802734375, + "logps/rejected": -3492.2802734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -346.28961181640625, + "rewards/margins": 0.0, + "rewards/rejected": -346.28961181640625, + "step": 226 + }, + { + "epoch": 2.389473684210526, + "grad_norm": 3.1237228540703654e-05, + "learning_rate": 0.00019538947368421056, + "logits/chosen": 17.443052291870117, + "logits/rejected": 17.443052291870117, + "logps/chosen": -2894.005859375, + "logps/rejected": -2894.005859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -286.5732727050781, + "rewards/margins": 0.0, + "rewards/rejected": -286.5732727050781, + "step": 227 + }, + { + "epoch": 2.4, + "grad_norm": 4.1664425225462765e-05, + "learning_rate": 0.00019536842105263158, + "logits/chosen": 17.173011779785156, + "logits/rejected": 17.173011779785156, + "logps/chosen": -3830.0087890625, + "logps/rejected": -3830.0087890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -380.1536865234375, + "rewards/margins": 0.0, + "rewards/rejected": -380.1536865234375, + "step": 228 + }, + { + "epoch": 2.4105263157894736, + "grad_norm": 1.3555698387790471e-05, + "learning_rate": 0.00019534736842105263, + "logits/chosen": 16.77044677734375, + "logits/rejected": 16.77044677734375, + "logps/chosen": -2450.82861328125, + "logps/rejected": -2450.82861328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -242.35667419433594, + "rewards/margins": 0.0, + "rewards/rejected": -242.35667419433594, + "step": 229 + }, + { + "epoch": 2.4210526315789473, + "grad_norm": 1.865607737272512e-05, + "learning_rate": 0.00019532631578947368, + "logits/chosen": 16.351192474365234, + "logits/rejected": 16.351192474365234, + "logps/chosen": -3230.26806640625, + "logps/rejected": -3230.26806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -320.1084899902344, + "rewards/margins": 0.0, + "rewards/rejected": -320.1084899902344, + "step": 230 + }, + { + "epoch": 2.431578947368421, + "grad_norm": 2.954201954707969e-05, + "learning_rate": 0.00019530526315789475, + "logits/chosen": 15.748371124267578, + "logits/rejected": 15.748371124267578, + "logps/chosen": -4338.77783203125, + "logps/rejected": -4338.77783203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -431.0306091308594, + "rewards/margins": 0.0, + "rewards/rejected": -431.0306091308594, + "step": 231 + }, + { + "epoch": 2.442105263157895, + "grad_norm": 1.0253191248921212e-05, + "learning_rate": 0.0001952842105263158, + "logits/chosen": 15.156774520874023, + "logits/rejected": 15.156774520874023, + "logps/chosen": -2693.63916015625, + "logps/rejected": -2693.63916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -266.6377258300781, + "rewards/margins": 0.0, + "rewards/rejected": -266.6377258300781, + "step": 232 + }, + { + "epoch": 2.4526315789473685, + "grad_norm": 7.435526640620083e-06, + "learning_rate": 0.00019526315789473685, + "logits/chosen": 14.565388679504395, + "logits/rejected": 14.565388679504395, + "logps/chosen": -2461.76708984375, + "logps/rejected": -2461.76708984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -243.39512634277344, + "rewards/margins": 0.0, + "rewards/rejected": -243.39512634277344, + "step": 233 + }, + { + "epoch": 2.463157894736842, + "grad_norm": 1.767127469065599e-05, + "learning_rate": 0.0001952421052631579, + "logits/chosen": 14.204218864440918, + "logits/rejected": 14.204218864440918, + "logps/chosen": -4046.7734375, + "logps/rejected": -4046.7734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -401.4924621582031, + "rewards/margins": 0.0, + "rewards/rejected": -401.4924621582031, + "step": 234 + }, + { + "epoch": 2.473684210526316, + "grad_norm": 1.1389983228582423e-05, + "learning_rate": 0.00019522105263157895, + "logits/chosen": 13.95106315612793, + "logits/rejected": 13.95106315612793, + "logps/chosen": -3572.88134765625, + "logps/rejected": -3572.88134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -354.4190368652344, + "rewards/margins": 0.0, + "rewards/rejected": -354.4190368652344, + "step": 235 + }, + { + "epoch": 2.4842105263157896, + "grad_norm": 1.084066661860561e-05, + "learning_rate": 0.0001952, + "logits/chosen": 13.596949577331543, + "logits/rejected": 13.596949577331543, + "logps/chosen": -3798.3984375, + "logps/rejected": -3798.3984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -377.0125427246094, + "rewards/margins": 0.0, + "rewards/rejected": -377.0125427246094, + "step": 236 + }, + { + "epoch": 2.4947368421052634, + "grad_norm": 1.549422086100094e-05, + "learning_rate": 0.00019517894736842105, + "logits/chosen": 13.390946388244629, + "logits/rejected": 13.390946388244629, + "logps/chosen": -4671.4033203125, + "logps/rejected": -4671.4033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -464.2931823730469, + "rewards/margins": 0.0, + "rewards/rejected": -464.2931823730469, + "step": 237 + }, + { + "epoch": 2.5052631578947366, + "grad_norm": 9.202953151543625e-06, + "learning_rate": 0.00019515789473684213, + "logits/chosen": 12.943868637084961, + "logits/rejected": 12.943868637084961, + "logps/chosen": -3399.74951171875, + "logps/rejected": -3399.74951171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -336.9490661621094, + "rewards/margins": 0.0, + "rewards/rejected": -336.9490661621094, + "step": 238 + }, + { + "epoch": 2.515789473684211, + "grad_norm": 1.901045470731333e-05, + "learning_rate": 0.00019513684210526318, + "logits/chosen": 12.912806510925293, + "logits/rejected": 12.912806510925293, + "logps/chosen": -4980.595703125, + "logps/rejected": -4980.595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -495.12115478515625, + "rewards/margins": 0.0, + "rewards/rejected": -495.12115478515625, + "step": 239 + }, + { + "epoch": 2.526315789473684, + "grad_norm": 4.548305696516763e-06, + "learning_rate": 0.00019511578947368423, + "logits/chosen": 12.52194595336914, + "logits/rejected": 12.52194595336914, + "logps/chosen": -2582.576904296875, + "logps/rejected": -2582.576904296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -255.47610473632812, + "rewards/margins": 0.0, + "rewards/rejected": -255.47610473632812, + "step": 240 + }, + { + "epoch": 2.536842105263158, + "grad_norm": 7.886830644565634e-06, + "learning_rate": 0.00019509473684210527, + "logits/chosen": 12.501900672912598, + "logits/rejected": 12.501900672912598, + "logps/chosen": -3638.797607421875, + "logps/rejected": -3638.797607421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -360.96148681640625, + "rewards/margins": 0.0, + "rewards/rejected": -360.96148681640625, + "step": 241 + }, + { + "epoch": 2.5473684210526315, + "grad_norm": 1.6854215573403053e-05, + "learning_rate": 0.00019507368421052632, + "logits/chosen": 12.465921401977539, + "logits/rejected": 12.465921401977539, + "logps/chosen": -5014.8525390625, + "logps/rejected": -5014.8525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -498.5468444824219, + "rewards/margins": 0.0, + "rewards/rejected": -498.5468444824219, + "step": 242 + }, + { + "epoch": 2.557894736842105, + "grad_norm": 7.458407708327286e-06, + "learning_rate": 0.00019505263157894737, + "logits/chosen": 12.224055290222168, + "logits/rejected": 12.224055290222168, + "logps/chosen": -3439.12646484375, + "logps/rejected": -3439.12646484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -340.88677978515625, + "rewards/margins": 0.0, + "rewards/rejected": -340.88677978515625, + "step": 243 + }, + { + "epoch": 2.568421052631579, + "grad_norm": 1.0492250112292822e-05, + "learning_rate": 0.00019503157894736842, + "logits/chosen": 12.271549224853516, + "logits/rejected": 12.271549224853516, + "logps/chosen": -4212.18310546875, + "logps/rejected": -4212.18310546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -418.033447265625, + "rewards/margins": 0.0, + "rewards/rejected": -418.033447265625, + "step": 244 + }, + { + "epoch": 2.5789473684210527, + "grad_norm": 9.04471380636096e-06, + "learning_rate": 0.0001950105263157895, + "logits/chosen": 12.143754005432129, + "logits/rejected": 12.143754005432129, + "logps/chosen": -3889.806640625, + "logps/rejected": -3889.806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -386.1533508300781, + "rewards/margins": 0.0, + "rewards/rejected": -386.1533508300781, + "step": 245 + }, + { + "epoch": 2.5894736842105264, + "grad_norm": 1.883074946817942e-05, + "learning_rate": 0.00019498947368421055, + "logits/chosen": 12.154573440551758, + "logits/rejected": 12.154573440551758, + "logps/chosen": -5032.49169921875, + "logps/rejected": -5032.49169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -500.3107604980469, + "rewards/margins": 0.0, + "rewards/rejected": -500.3107604980469, + "step": 246 + }, + { + "epoch": 2.6, + "grad_norm": 1.1391132829885464e-05, + "learning_rate": 0.00019496842105263157, + "logits/chosen": 11.987241744995117, + "logits/rejected": 11.987241744995117, + "logps/chosen": -3883.806640625, + "logps/rejected": -3883.806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -385.5533447265625, + "rewards/margins": 0.0, + "rewards/rejected": -385.5533447265625, + "step": 247 + }, + { + "epoch": 2.610526315789474, + "grad_norm": 1.7160728020826355e-05, + "learning_rate": 0.00019494736842105265, + "logits/chosen": 11.933201789855957, + "logits/rejected": 11.933201789855957, + "logps/chosen": -4153.123046875, + "logps/rejected": -4153.123046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -412.51495361328125, + "rewards/margins": 0.0, + "rewards/rejected": -412.51495361328125, + "step": 248 + }, + { + "epoch": 2.6210526315789475, + "grad_norm": 2.486378616595175e-05, + "learning_rate": 0.0001949263157894737, + "logits/chosen": 11.837088584899902, + "logits/rejected": 11.837088584899902, + "logps/chosen": -4702.62109375, + "logps/rejected": -4702.62109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -467.4149475097656, + "rewards/margins": 0.0, + "rewards/rejected": -467.4149475097656, + "step": 249 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 1.9884004359482788e-05, + "learning_rate": 0.00019490526315789475, + "logits/chosen": 11.529038429260254, + "logits/rejected": 11.529038429260254, + "logps/chosen": -3380.365234375, + "logps/rejected": -3380.365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -335.0106506347656, + "rewards/margins": 0.0, + "rewards/rejected": -335.0106506347656, + "step": 250 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": 11.380880355834961, + "eval_logits/rejected": 11.380880355834961, + "eval_logps/chosen": -4068.390625, + "eval_logps/rejected": -4068.390625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -403.9358825683594, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -403.9358825683594, + "eval_runtime": 4.5019, + "eval_samples_per_second": 2.221, + "eval_steps_per_second": 2.221, + "step": 250 + }, + { + "epoch": 2.6421052631578945, + "grad_norm": 1.5210554920486175e-05, + "learning_rate": 0.0001948842105263158, + "logits/chosen": 11.246644020080566, + "logits/rejected": 11.246644020080566, + "logps/chosen": -2526.46533203125, + "logps/rejected": -2526.46533203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -249.8649444580078, + "rewards/margins": 0.0, + "rewards/rejected": -249.8649444580078, + "step": 251 + }, + { + "epoch": 2.6526315789473687, + "grad_norm": 4.103879109607078e-05, + "learning_rate": 0.00019486315789473687, + "logits/chosen": 11.121448516845703, + "logits/rejected": 11.121448516845703, + "logps/chosen": -3731.0234375, + "logps/rejected": -3731.0234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -370.2750244140625, + "rewards/margins": 0.0, + "rewards/rejected": -370.2750244140625, + "step": 252 + }, + { + "epoch": 2.663157894736842, + "grad_norm": 2.5248224119422957e-05, + "learning_rate": 0.00019484210526315792, + "logits/chosen": 10.745014190673828, + "logits/rejected": 10.745014190673828, + "logps/chosen": -2451.896484375, + "logps/rejected": -2451.896484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -242.40806579589844, + "rewards/margins": 0.0, + "rewards/rejected": -242.40806579589844, + "step": 253 + }, + { + "epoch": 2.6736842105263157, + "grad_norm": 4.5085376768838614e-05, + "learning_rate": 0.00019482105263157894, + "logits/chosen": 10.564476013183594, + "logits/rejected": 10.564476013183594, + "logps/chosen": -3395.93359375, + "logps/rejected": -3395.93359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -336.675048828125, + "rewards/margins": 0.0, + "rewards/rejected": -336.675048828125, + "step": 254 + }, + { + "epoch": 2.6842105263157894, + "grad_norm": 6.854850653326139e-05, + "learning_rate": 0.0001948, + "logits/chosen": 10.420607566833496, + "logits/rejected": 10.420607566833496, + "logps/chosen": -3818.4208984375, + "logps/rejected": -3818.4208984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -379.04473876953125, + "rewards/margins": 0.0, + "rewards/rejected": -379.04473876953125, + "step": 255 + }, + { + "epoch": 2.694736842105263, + "grad_norm": 7.055894093355164e-05, + "learning_rate": 0.00019477894736842107, + "logits/chosen": 9.988139152526855, + "logits/rejected": 9.988139152526855, + "logps/chosen": -3482.1787109375, + "logps/rejected": -3482.1787109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -345.39056396484375, + "rewards/margins": 0.0, + "rewards/rejected": -345.39056396484375, + "step": 256 + }, + { + "epoch": 2.705263157894737, + "grad_norm": 3.460259540588595e-05, + "learning_rate": 0.00019475789473684212, + "logits/chosen": 9.430808067321777, + "logits/rejected": 9.430808067321777, + "logps/chosen": -2254.797607421875, + "logps/rejected": -2254.797607421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -222.6981658935547, + "rewards/margins": 0.0, + "rewards/rejected": -222.6981658935547, + "step": 257 + }, + { + "epoch": 2.7157894736842105, + "grad_norm": 0.00010256427776766941, + "learning_rate": 0.00019473684210526317, + "logits/chosen": 9.299915313720703, + "logits/rejected": 9.299915313720703, + "logps/chosen": -4039.5791015625, + "logps/rejected": -4039.5791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -401.1107177734375, + "rewards/margins": 0.0, + "rewards/rejected": -401.1107177734375, + "step": 258 + }, + { + "epoch": 2.7263157894736842, + "grad_norm": 9.223208326147869e-05, + "learning_rate": 0.00019471578947368422, + "logits/chosen": 8.70045280456543, + "logits/rejected": 8.70045280456543, + "logps/chosen": -3428.341552734375, + "logps/rejected": -3428.341552734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -339.6492614746094, + "rewards/margins": 0.0, + "rewards/rejected": -339.6492614746094, + "step": 259 + }, + { + "epoch": 2.736842105263158, + "grad_norm": 0.00010385946370661259, + "learning_rate": 0.00019469473684210527, + "logits/chosen": 8.262590408325195, + "logits/rejected": 8.262590408325195, + "logps/chosen": -3089.6640625, + "logps/rejected": -3089.6640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -306.13909912109375, + "rewards/margins": 0.0, + "rewards/rejected": -306.13909912109375, + "step": 260 + }, + { + "epoch": 2.7473684210526317, + "grad_norm": 9.678524656919762e-05, + "learning_rate": 0.00019467368421052631, + "logits/chosen": 7.670555114746094, + "logits/rejected": 7.670555114746094, + "logps/chosen": -2587.950439453125, + "logps/rejected": -2587.950439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -255.7691650390625, + "rewards/margins": 0.0, + "rewards/rejected": -255.7691650390625, + "step": 261 + }, + { + "epoch": 2.7578947368421054, + "grad_norm": 9.062133176485077e-05, + "learning_rate": 0.00019465263157894736, + "logits/chosen": 7.307285785675049, + "logits/rejected": 7.307285785675049, + "logps/chosen": -2509.31005859375, + "logps/rejected": -2509.31005859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -247.90513610839844, + "rewards/margins": 0.0, + "rewards/rejected": -247.90513610839844, + "step": 262 + }, + { + "epoch": 2.768421052631579, + "grad_norm": 9.935448179021478e-05, + "learning_rate": 0.00019463157894736844, + "logits/chosen": 7.144592761993408, + "logits/rejected": 7.144592761993408, + "logps/chosen": -2820.29638671875, + "logps/rejected": -2820.29638671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -279.20233154296875, + "rewards/margins": 0.0, + "rewards/rejected": -279.20233154296875, + "step": 263 + }, + { + "epoch": 2.7789473684210524, + "grad_norm": 9.599085024092346e-05, + "learning_rate": 0.0001946105263157895, + "logits/chosen": 6.889897346496582, + "logits/rejected": 6.889897346496582, + "logps/chosen": -2589.03564453125, + "logps/rejected": -2589.03564453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -255.98526000976562, + "rewards/margins": 0.0, + "rewards/rejected": -255.98526000976562, + "step": 264 + }, + { + "epoch": 2.7894736842105265, + "grad_norm": 8.2246377132833e-05, + "learning_rate": 0.00019458947368421054, + "logits/chosen": 6.846752643585205, + "logits/rejected": 6.846752643585205, + "logps/chosen": -2410.0673828125, + "logps/rejected": -2410.0673828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -237.98086547851562, + "rewards/margins": 0.0, + "rewards/rejected": -237.98086547851562, + "step": 265 + }, + { + "epoch": 2.8, + "grad_norm": 0.0001246191532118246, + "learning_rate": 0.0001945684210526316, + "logits/chosen": 6.849774360656738, + "logits/rejected": 6.849774360656738, + "logps/chosen": -3313.88623046875, + "logps/rejected": -3313.88623046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -328.54144287109375, + "rewards/margins": 0.0, + "rewards/rejected": -328.54144287109375, + "step": 266 + }, + { + "epoch": 2.8105263157894735, + "grad_norm": 0.00011715733126038685, + "learning_rate": 0.00019454736842105264, + "logits/chosen": 6.692395210266113, + "logits/rejected": 6.692395210266113, + "logps/chosen": -3254.78662109375, + "logps/rejected": -3254.78662109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -322.6314697265625, + "rewards/margins": 0.0, + "rewards/rejected": -322.6314697265625, + "step": 267 + }, + { + "epoch": 2.8210526315789473, + "grad_norm": 5.87665599596221e-05, + "learning_rate": 0.00019452631578947369, + "logits/chosen": 6.446358680725098, + "logits/rejected": 6.446358680725098, + "logps/chosen": -2285.24609375, + "logps/rejected": -2285.24609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -225.4987335205078, + "rewards/margins": 0.0, + "rewards/rejected": -225.4987335205078, + "step": 268 + }, + { + "epoch": 2.831578947368421, + "grad_norm": 7.499523053411394e-05, + "learning_rate": 0.00019450526315789474, + "logits/chosen": 6.247101783752441, + "logits/rejected": 6.247101783752441, + "logps/chosen": -2381.52587890625, + "logps/rejected": -2381.52587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -235.28347778320312, + "rewards/margins": 0.0, + "rewards/rejected": -235.28347778320312, + "step": 269 + }, + { + "epoch": 2.8421052631578947, + "grad_norm": 0.00012944728950969875, + "learning_rate": 0.0001944842105263158, + "logits/chosen": 6.291374206542969, + "logits/rejected": 6.291374206542969, + "logps/chosen": -3051.115234375, + "logps/rejected": -3051.115234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -302.26434326171875, + "rewards/margins": 0.0, + "rewards/rejected": -302.26434326171875, + "step": 270 + }, + { + "epoch": 2.8526315789473684, + "grad_norm": 6.670731090707704e-05, + "learning_rate": 0.00019446315789473686, + "logits/chosen": 6.329052925109863, + "logits/rejected": 6.329052925109863, + "logps/chosen": -2186.439453125, + "logps/rejected": -2186.439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -215.61807250976562, + "rewards/margins": 0.0, + "rewards/rejected": -215.61807250976562, + "step": 271 + }, + { + "epoch": 2.863157894736842, + "grad_norm": 0.00016976258484646678, + "learning_rate": 0.0001944421052631579, + "logits/chosen": 6.608790397644043, + "logits/rejected": 6.608790397644043, + "logps/chosen": -3241.544921875, + "logps/rejected": -3241.544921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -321.216064453125, + "rewards/margins": 0.0, + "rewards/rejected": -321.216064453125, + "step": 272 + }, + { + "epoch": 2.873684210526316, + "grad_norm": 3.563039354048669e-05, + "learning_rate": 0.00019442105263157896, + "logits/chosen": 7.397287845611572, + "logits/rejected": 7.397287845611572, + "logps/chosen": -1778.8427734375, + "logps/rejected": -1778.8427734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -175.1027069091797, + "rewards/margins": 0.0, + "rewards/rejected": -175.1027069091797, + "step": 273 + }, + { + "epoch": 2.8842105263157896, + "grad_norm": 6.053608012734912e-05, + "learning_rate": 0.0001944, + "logits/chosen": 8.16748046875, + "logits/rejected": 8.16748046875, + "logps/chosen": -2778.11279296875, + "logps/rejected": -2778.11279296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -274.9839782714844, + "rewards/margins": 0.0, + "rewards/rejected": -274.9839782714844, + "step": 274 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.00012767533189617097, + "learning_rate": 0.00019437894736842106, + "logits/chosen": 8.444000244140625, + "logits/rejected": 8.444000244140625, + "logps/chosen": -3656.23828125, + "logps/rejected": -3656.23828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -362.6853942871094, + "rewards/margins": 0.0, + "rewards/rejected": -362.6853942871094, + "step": 275 + }, + { + "epoch": 2.905263157894737, + "grad_norm": 0.00044787710066884756, + "learning_rate": 0.0001943578947368421, + "logits/chosen": 8.340536117553711, + "logits/rejected": 8.340536117553711, + "logps/chosen": -3042.4384765625, + "logps/rejected": -3042.4384765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -301.4465026855469, + "rewards/margins": 0.0, + "rewards/rejected": -301.4465026855469, + "step": 276 + }, + { + "epoch": 2.9157894736842103, + "grad_norm": 6.145967199699953e-05, + "learning_rate": 0.00019433684210526318, + "logits/chosen": 9.499319076538086, + "logits/rejected": 9.499319076538086, + "logps/chosen": -3730.025390625, + "logps/rejected": -3730.025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -370.1553649902344, + "rewards/margins": 0.0, + "rewards/rejected": -370.1553649902344, + "step": 277 + }, + { + "epoch": 2.9263157894736844, + "grad_norm": 5.096704626339488e-05, + "learning_rate": 0.00019431578947368423, + "logits/chosen": 10.031513214111328, + "logits/rejected": 10.031513214111328, + "logps/chosen": -3853.0146484375, + "logps/rejected": -3853.0146484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -382.45428466796875, + "rewards/margins": 0.0, + "rewards/rejected": -382.45428466796875, + "step": 278 + }, + { + "epoch": 2.9368421052631577, + "grad_norm": 3.408677002880722e-05, + "learning_rate": 0.00019429473684210526, + "logits/chosen": 10.48922061920166, + "logits/rejected": 10.48922061920166, + "logps/chosen": -3471.667236328125, + "logps/rejected": -3471.667236328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -344.3693542480469, + "rewards/margins": 0.0, + "rewards/rejected": -344.3693542480469, + "step": 279 + }, + { + "epoch": 2.9473684210526314, + "grad_norm": 2.416540155536495e-05, + "learning_rate": 0.00019427368421052633, + "logits/chosen": 10.883756637573242, + "logits/rejected": 10.883756637573242, + "logps/chosen": -3115.751953125, + "logps/rejected": -3115.751953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -308.7060852050781, + "rewards/margins": 0.0, + "rewards/rejected": -308.7060852050781, + "step": 280 + }, + { + "epoch": 2.957894736842105, + "grad_norm": 2.587015478638932e-05, + "learning_rate": 0.00019425263157894738, + "logits/chosen": 11.22066879272461, + "logits/rejected": 11.22066879272461, + "logps/chosen": -3352.4072265625, + "logps/rejected": -3352.4072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -332.4134216308594, + "rewards/margins": 0.0, + "rewards/rejected": -332.4134216308594, + "step": 281 + }, + { + "epoch": 2.968421052631579, + "grad_norm": 2.9007293051108718e-05, + "learning_rate": 0.00019423157894736843, + "logits/chosen": 11.473288536071777, + "logits/rejected": 11.473288536071777, + "logps/chosen": -3636.136962890625, + "logps/rejected": -3636.136962890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -360.81634521484375, + "rewards/margins": 0.0, + "rewards/rejected": -360.81634521484375, + "step": 282 + }, + { + "epoch": 2.9789473684210526, + "grad_norm": 0.00021073163952678442, + "learning_rate": 0.00019421052631578948, + "logits/chosen": 11.509716987609863, + "logits/rejected": 11.509716987609863, + "logps/chosen": -4141.9130859375, + "logps/rejected": -4141.9130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -411.3441467285156, + "rewards/margins": 0.0, + "rewards/rejected": -411.3441467285156, + "step": 283 + }, + { + "epoch": 2.9894736842105263, + "grad_norm": 1.5749537851661444e-05, + "learning_rate": 0.00019418947368421056, + "logits/chosen": 11.974430084228516, + "logits/rejected": 11.974430084228516, + "logps/chosen": -3471.6171875, + "logps/rejected": -3471.6171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -344.33441162109375, + "rewards/margins": 0.0, + "rewards/rejected": -344.33441162109375, + "step": 284 + }, + { + "epoch": 3.0, + "grad_norm": 1.054062886396423e-05, + "learning_rate": 0.00019416842105263158, + "logits/chosen": 12.16253662109375, + "logits/rejected": 12.16253662109375, + "logps/chosen": -3107.7216796875, + "logps/rejected": -3107.7216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -307.7463073730469, + "rewards/margins": 0.0, + "rewards/rejected": -307.7463073730469, + "step": 285 + }, + { + "epoch": 3.0105263157894737, + "grad_norm": 1.4191447917255573e-05, + "learning_rate": 0.00019414736842105263, + "logits/chosen": 12.31031322479248, + "logits/rejected": 12.31031322479248, + "logps/chosen": -3822.673828125, + "logps/rejected": -3822.673828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -379.0824890136719, + "rewards/margins": 0.0, + "rewards/rejected": -379.0824890136719, + "step": 286 + }, + { + "epoch": 3.0210526315789474, + "grad_norm": 1.0503478733880911e-05, + "learning_rate": 0.00019412631578947368, + "logits/chosen": 12.43275260925293, + "logits/rejected": 12.43275260925293, + "logps/chosen": -3547.466796875, + "logps/rejected": -3547.466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.91937255859375, + "rewards/margins": 0.0, + "rewards/rejected": -351.91937255859375, + "step": 287 + }, + { + "epoch": 3.031578947368421, + "grad_norm": 4.318946594139561e-06, + "learning_rate": 0.00019410526315789475, + "logits/chosen": 12.568634986877441, + "logits/rejected": 12.568634986877441, + "logps/chosen": -2387.291015625, + "logps/rejected": -2387.291015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -235.947509765625, + "rewards/margins": 0.0, + "rewards/rejected": -235.947509765625, + "step": 288 + }, + { + "epoch": 3.042105263157895, + "grad_norm": 1.4017550711287186e-05, + "learning_rate": 0.0001940842105263158, + "logits/chosen": 12.663244247436523, + "logits/rejected": 12.663244247436523, + "logps/chosen": -4386.4853515625, + "logps/rejected": -4386.4853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -435.8013610839844, + "rewards/margins": 0.0, + "rewards/rejected": -435.8013610839844, + "step": 289 + }, + { + "epoch": 3.0526315789473686, + "grad_norm": 4.828956207347801e-06, + "learning_rate": 0.00019406315789473685, + "logits/chosen": 12.748896598815918, + "logits/rejected": 12.748896598815918, + "logps/chosen": -2679.640625, + "logps/rejected": -2679.640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -265.2378845214844, + "rewards/margins": 0.0, + "rewards/rejected": -265.2378845214844, + "step": 290 + }, + { + "epoch": 3.0631578947368423, + "grad_norm": 6.494905392173678e-06, + "learning_rate": 0.0001940421052631579, + "logits/chosen": 12.834653854370117, + "logits/rejected": 12.834653854370117, + "logps/chosen": -3218.525390625, + "logps/rejected": -3218.525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -318.82666015625, + "rewards/margins": 0.0, + "rewards/rejected": -318.82666015625, + "step": 291 + }, + { + "epoch": 3.0736842105263156, + "grad_norm": 7.902185643615667e-06, + "learning_rate": 0.00019402105263157895, + "logits/chosen": 12.906092643737793, + "logits/rejected": 12.906092643737793, + "logps/chosen": -3637.69140625, + "logps/rejected": -3637.69140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -360.94183349609375, + "rewards/margins": 0.0, + "rewards/rejected": -360.94183349609375, + "step": 292 + }, + { + "epoch": 3.0842105263157893, + "grad_norm": 5.871730991202639e-06, + "learning_rate": 0.000194, + "logits/chosen": 12.97829818725586, + "logits/rejected": 12.97829818725586, + "logps/chosen": -3247.337890625, + "logps/rejected": -3247.337890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -321.7079162597656, + "rewards/margins": 0.0, + "rewards/rejected": -321.7079162597656, + "step": 293 + }, + { + "epoch": 3.094736842105263, + "grad_norm": 1.3583415238827001e-05, + "learning_rate": 0.00019397894736842105, + "logits/chosen": 13.062776565551758, + "logits/rejected": 13.062776565551758, + "logps/chosen": -4755.3134765625, + "logps/rejected": -4755.3134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -472.5929260253906, + "rewards/margins": 0.0, + "rewards/rejected": -472.5929260253906, + "step": 294 + }, + { + "epoch": 3.1052631578947367, + "grad_norm": 1.0447386557643767e-05, + "learning_rate": 0.00019395789473684212, + "logits/chosen": 13.109066009521484, + "logits/rejected": 13.109066009521484, + "logps/chosen": -4507.43017578125, + "logps/rejected": -4507.43017578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -447.8958435058594, + "rewards/margins": 0.0, + "rewards/rejected": -447.8958435058594, + "step": 295 + }, + { + "epoch": 3.1157894736842104, + "grad_norm": 4.985206032870337e-06, + "learning_rate": 0.00019393684210526317, + "logits/chosen": 13.158016204833984, + "logits/rejected": 13.158016204833984, + "logps/chosen": -3289.4375, + "logps/rejected": -3289.4375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -325.9178771972656, + "rewards/margins": 0.0, + "rewards/rejected": -325.9178771972656, + "step": 296 + }, + { + "epoch": 3.126315789473684, + "grad_norm": 6.14567807133426e-06, + "learning_rate": 0.00019391578947368422, + "logits/chosen": 13.205784797668457, + "logits/rejected": 13.205784797668457, + "logps/chosen": -3716.341796875, + "logps/rejected": -3716.341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -368.8068542480469, + "rewards/margins": 0.0, + "rewards/rejected": -368.8068542480469, + "step": 297 + }, + { + "epoch": 3.136842105263158, + "grad_norm": 8.479146345052868e-06, + "learning_rate": 0.00019389473684210527, + "logits/chosen": 13.259580612182617, + "logits/rejected": 13.259580612182617, + "logps/chosen": -4563.83251953125, + "logps/rejected": -4563.83251953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -453.5361022949219, + "rewards/margins": 0.0, + "rewards/rejected": -453.5361022949219, + "step": 298 + }, + { + "epoch": 3.1473684210526316, + "grad_norm": 2.743224968071445e-06, + "learning_rate": 0.00019387368421052632, + "logits/chosen": 13.301115989685059, + "logits/rejected": 13.301115989685059, + "logps/chosen": -2503.916015625, + "logps/rejected": -2503.916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -247.61001586914062, + "rewards/margins": 0.0, + "rewards/rejected": -247.61001586914062, + "step": 299 + }, + { + "epoch": 3.1578947368421053, + "grad_norm": 7.419741450576112e-06, + "learning_rate": 0.00019385263157894737, + "logits/chosen": 13.328307151794434, + "logits/rejected": 13.328307151794434, + "logps/chosen": -4598.9130859375, + "logps/rejected": -4598.9130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -457.0441589355469, + "rewards/margins": 0.0, + "rewards/rejected": -457.0441589355469, + "step": 300 + }, + { + "epoch": 3.1578947368421053, + "eval_logits/chosen": 13.36358642578125, + "eval_logits/rejected": 13.36358642578125, + "eval_logps/chosen": -4073.944580078125, + "eval_logps/rejected": -4073.944580078125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -404.49127197265625, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -404.49127197265625, + "eval_runtime": 4.4168, + "eval_samples_per_second": 2.264, + "eval_steps_per_second": 2.264, + "step": 300 + }, + { + "epoch": 3.168421052631579, + "grad_norm": 8.578429515182506e-06, + "learning_rate": 0.00019383157894736842, + "logits/chosen": 13.375102996826172, + "logits/rejected": 13.375102996826172, + "logps/chosen": -4891.9267578125, + "logps/rejected": -4891.9267578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -486.2542419433594, + "rewards/margins": 0.0, + "rewards/rejected": -486.2542419433594, + "step": 301 + }, + { + "epoch": 3.1789473684210527, + "grad_norm": 5.591416538663907e-06, + "learning_rate": 0.0001938105263157895, + "logits/chosen": 13.381508827209473, + "logits/rejected": 13.381508827209473, + "logps/chosen": -4063.48828125, + "logps/rejected": -4063.48828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -403.5514831542969, + "rewards/margins": 0.0, + "rewards/rejected": -403.5514831542969, + "step": 302 + }, + { + "epoch": 3.1894736842105265, + "grad_norm": 3.211605644537485e-06, + "learning_rate": 0.00019378947368421055, + "logits/chosen": 13.397092819213867, + "logits/rejected": 13.397092819213867, + "logps/chosen": -3377.318359375, + "logps/rejected": -3377.318359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -334.7059631347656, + "rewards/margins": 0.0, + "rewards/rejected": -334.7059631347656, + "step": 303 + }, + { + "epoch": 3.2, + "grad_norm": 2.1825333078595577e-06, + "learning_rate": 0.0001937684210526316, + "logits/chosen": 13.43088150024414, + "logits/rejected": 13.43088150024414, + "logps/chosen": -2549.171875, + "logps/rejected": -2549.171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -252.13560485839844, + "rewards/margins": 0.0, + "rewards/rejected": -252.13560485839844, + "step": 304 + }, + { + "epoch": 3.2105263157894735, + "grad_norm": 2.0641391529352404e-06, + "learning_rate": 0.00019374736842105264, + "logits/chosen": 13.444385528564453, + "logits/rejected": 13.444385528564453, + "logps/chosen": -2556.91796875, + "logps/rejected": -2556.91796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -252.9102020263672, + "rewards/margins": 0.0, + "rewards/rejected": -252.9102020263672, + "step": 305 + }, + { + "epoch": 3.221052631578947, + "grad_norm": 3.567095973266987e-06, + "learning_rate": 0.0001937263157894737, + "logits/chosen": 13.43985366821289, + "logits/rejected": 13.43985366821289, + "logps/chosen": -3626.75, + "logps/rejected": -3626.75, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -359.8058776855469, + "rewards/margins": 0.0, + "rewards/rejected": -359.8058776855469, + "step": 306 + }, + { + "epoch": 3.231578947368421, + "grad_norm": 5.465305093821371e-06, + "learning_rate": 0.00019370526315789474, + "logits/chosen": 13.44680118560791, + "logits/rejected": 13.44680118560791, + "logps/chosen": -4702.0712890625, + "logps/rejected": -4702.0712890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -467.3599548339844, + "rewards/margins": 0.0, + "rewards/rejected": -467.3599548339844, + "step": 307 + }, + { + "epoch": 3.2421052631578946, + "grad_norm": 3.405055622351938e-06, + "learning_rate": 0.0001936842105263158, + "logits/chosen": 13.455514907836914, + "logits/rejected": 13.455514907836914, + "logps/chosen": -3645.736328125, + "logps/rejected": -3645.736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -361.70452880859375, + "rewards/margins": 0.0, + "rewards/rejected": -361.70452880859375, + "step": 308 + }, + { + "epoch": 3.2526315789473683, + "grad_norm": 3.167355544064776e-06, + "learning_rate": 0.00019366315789473687, + "logits/chosen": 13.459683418273926, + "logits/rejected": 13.459683418273926, + "logps/chosen": -3632.97265625, + "logps/rejected": -3632.97265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -360.37896728515625, + "rewards/margins": 0.0, + "rewards/rejected": -360.37896728515625, + "step": 309 + }, + { + "epoch": 3.263157894736842, + "grad_norm": 2.1577347979473416e-06, + "learning_rate": 0.00019364210526315792, + "logits/chosen": 13.475505828857422, + "logits/rejected": 13.475505828857422, + "logps/chosen": -2878.51953125, + "logps/rejected": -2878.51953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -285.1257629394531, + "rewards/margins": 0.0, + "rewards/rejected": -285.1257629394531, + "step": 310 + }, + { + "epoch": 3.2736842105263158, + "grad_norm": 2.2971255475567887e-06, + "learning_rate": 0.00019362105263157894, + "logits/chosen": 13.465211868286133, + "logits/rejected": 13.465211868286133, + "logps/chosen": -3447.306640625, + "logps/rejected": -3447.306640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -341.7048034667969, + "rewards/margins": 0.0, + "rewards/rejected": -341.7048034667969, + "step": 311 + }, + { + "epoch": 3.2842105263157895, + "grad_norm": 4.984345196135109e-06, + "learning_rate": 0.00019360000000000002, + "logits/chosen": 13.494173049926758, + "logits/rejected": 13.494173049926758, + "logps/chosen": -5041.0517578125, + "logps/rejected": -5041.0517578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -501.166748046875, + "rewards/margins": 0.0, + "rewards/rejected": -501.166748046875, + "step": 312 + }, + { + "epoch": 3.294736842105263, + "grad_norm": 2.226493052148726e-06, + "learning_rate": 0.00019357894736842107, + "logits/chosen": 13.483041763305664, + "logits/rejected": 13.483041763305664, + "logps/chosen": -2896.169921875, + "logps/rejected": -2896.169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -286.89080810546875, + "rewards/margins": 0.0, + "rewards/rejected": -286.89080810546875, + "step": 313 + }, + { + "epoch": 3.305263157894737, + "grad_norm": 3.9725127862766385e-06, + "learning_rate": 0.00019355789473684212, + "logits/chosen": 13.47500991821289, + "logits/rejected": 13.47500991821289, + "logps/chosen": -4773.4384765625, + "logps/rejected": -4773.4384765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -474.4966735839844, + "rewards/margins": 0.0, + "rewards/rejected": -474.4966735839844, + "step": 314 + }, + { + "epoch": 3.3157894736842106, + "grad_norm": 4.561420610116329e-06, + "learning_rate": 0.00019353684210526316, + "logits/chosen": 13.497587203979492, + "logits/rejected": 13.497587203979492, + "logps/chosen": -5068.68701171875, + "logps/rejected": -5068.68701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -503.9302673339844, + "rewards/margins": 0.0, + "rewards/rejected": -503.9302673339844, + "step": 315 + }, + { + "epoch": 3.3263157894736843, + "grad_norm": 2.6150439680350246e-06, + "learning_rate": 0.00019351578947368424, + "logits/chosen": 13.476734161376953, + "logits/rejected": 13.476734161376953, + "logps/chosen": -3706.12109375, + "logps/rejected": -3706.12109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -367.7430114746094, + "rewards/margins": 0.0, + "rewards/rejected": -367.7430114746094, + "step": 316 + }, + { + "epoch": 3.336842105263158, + "grad_norm": 2.6457962576387217e-06, + "learning_rate": 0.00019349473684210526, + "logits/chosen": 13.47456169128418, + "logits/rejected": 13.47456169128418, + "logps/chosen": -3712.359375, + "logps/rejected": -3712.359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -368.3668212890625, + "rewards/margins": 0.0, + "rewards/rejected": -368.3668212890625, + "step": 317 + }, + { + "epoch": 3.3473684210526318, + "grad_norm": 2.2700191948388238e-06, + "learning_rate": 0.0001934736842105263, + "logits/chosen": 13.46279525756836, + "logits/rejected": 13.46279525756836, + "logps/chosen": -3487.662109375, + "logps/rejected": -3487.662109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -345.7403259277344, + "rewards/margins": 0.0, + "rewards/rejected": -345.7403259277344, + "step": 318 + }, + { + "epoch": 3.3578947368421055, + "grad_norm": 4.183463261142606e-06, + "learning_rate": 0.00019345263157894736, + "logits/chosen": 13.485363006591797, + "logits/rejected": 13.485363006591797, + "logps/chosen": -5099.19970703125, + "logps/rejected": -5099.19970703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -506.9815368652344, + "rewards/margins": 0.0, + "rewards/rejected": -506.9815368652344, + "step": 319 + }, + { + "epoch": 3.3684210526315788, + "grad_norm": 2.0982406567782164e-06, + "learning_rate": 0.00019343157894736844, + "logits/chosen": 13.450308799743652, + "logits/rejected": 13.450308799743652, + "logps/chosen": -3496.373046875, + "logps/rejected": -3496.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -346.6114196777344, + "rewards/margins": 0.0, + "rewards/rejected": -346.6114196777344, + "step": 320 + }, + { + "epoch": 3.3789473684210525, + "grad_norm": 2.1947737423033686e-06, + "learning_rate": 0.0001934105263157895, + "logits/chosen": 13.457178115844727, + "logits/rejected": 13.457178115844727, + "logps/chosen": -2930.548828125, + "logps/rejected": -2930.548828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -290.3287048339844, + "rewards/margins": 0.0, + "rewards/rejected": -290.3287048339844, + "step": 321 + }, + { + "epoch": 3.389473684210526, + "grad_norm": 1.7130514606833458e-06, + "learning_rate": 0.00019338947368421054, + "logits/chosen": 13.455402374267578, + "logits/rejected": 13.455402374267578, + "logps/chosen": -2640.44140625, + "logps/rejected": -2640.44140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -261.2625427246094, + "rewards/margins": 0.0, + "rewards/rejected": -261.2625427246094, + "step": 322 + }, + { + "epoch": 3.4, + "grad_norm": 1.4684018196930992e-06, + "learning_rate": 0.00019336842105263159, + "logits/chosen": 13.417861938476562, + "logits/rejected": 13.417861938476562, + "logps/chosen": -3507.39453125, + "logps/rejected": -3507.39453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -347.7135925292969, + "rewards/margins": 0.0, + "rewards/rejected": -347.7135925292969, + "step": 323 + }, + { + "epoch": 3.4105263157894736, + "grad_norm": 3.666943712232751e-06, + "learning_rate": 0.00019334736842105263, + "logits/chosen": 13.4088134765625, + "logits/rejected": 13.4088134765625, + "logps/chosen": -4832.970703125, + "logps/rejected": -4832.970703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -480.4499206542969, + "rewards/margins": 0.0, + "rewards/rejected": -480.4499206542969, + "step": 324 + }, + { + "epoch": 3.4210526315789473, + "grad_norm": 1.8650094943950535e-06, + "learning_rate": 0.00019332631578947368, + "logits/chosen": 13.40343189239502, + "logits/rejected": 13.40343189239502, + "logps/chosen": -3721.9345703125, + "logps/rejected": -3721.9345703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -369.275146484375, + "rewards/margins": 0.0, + "rewards/rejected": -369.275146484375, + "step": 325 + }, + { + "epoch": 3.431578947368421, + "grad_norm": 1.3328573231774499e-06, + "learning_rate": 0.00019330526315789473, + "logits/chosen": 13.392024040222168, + "logits/rejected": 13.392024040222168, + "logps/chosen": -3515.6240234375, + "logps/rejected": -3515.6240234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -348.5365295410156, + "rewards/margins": 0.0, + "rewards/rejected": -348.5365295410156, + "step": 326 + }, + { + "epoch": 3.442105263157895, + "grad_norm": 1.1790776852649287e-06, + "learning_rate": 0.0001932842105263158, + "logits/chosen": 13.412732124328613, + "logits/rejected": 13.412732124328613, + "logps/chosen": -2651.748046875, + "logps/rejected": -2651.748046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -262.3932189941406, + "rewards/margins": 0.0, + "rewards/rejected": -262.3932189941406, + "step": 327 + }, + { + "epoch": 3.4526315789473685, + "grad_norm": 2.2755937152396655e-06, + "learning_rate": 0.00019326315789473686, + "logits/chosen": 13.37868595123291, + "logits/rejected": 13.37868595123291, + "logps/chosen": -3965.984375, + "logps/rejected": -3965.984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -393.7711181640625, + "rewards/margins": 0.0, + "rewards/rejected": -393.7711181640625, + "step": 328 + }, + { + "epoch": 3.463157894736842, + "grad_norm": 2.9809966690663714e-06, + "learning_rate": 0.0001932421052631579, + "logits/chosen": 13.375978469848633, + "logits/rejected": 13.375978469848633, + "logps/chosen": -4848.76123046875, + "logps/rejected": -4848.76123046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -482.0289611816406, + "rewards/margins": 0.0, + "rewards/rejected": -482.0289611816406, + "step": 329 + }, + { + "epoch": 3.473684210526316, + "grad_norm": 1.7792560811358271e-06, + "learning_rate": 0.00019322105263157896, + "logits/chosen": 13.367987632751465, + "logits/rejected": 13.367987632751465, + "logps/chosen": -3970.8125, + "logps/rejected": -3970.8125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -394.2539367675781, + "rewards/margins": 0.0, + "rewards/rejected": -394.2539367675781, + "step": 330 + }, + { + "epoch": 3.4842105263157896, + "grad_norm": 1.8501725662645185e-06, + "learning_rate": 0.0001932, + "logits/chosen": 13.373222351074219, + "logits/rejected": 13.373222351074219, + "logps/chosen": -4302.0166015625, + "logps/rejected": -4302.0166015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -427.0168151855469, + "rewards/margins": 0.0, + "rewards/rejected": -427.0168151855469, + "step": 331 + }, + { + "epoch": 3.4947368421052634, + "grad_norm": 1.8526296798881958e-06, + "learning_rate": 0.00019317894736842106, + "logits/chosen": 13.355216979980469, + "logits/rejected": 13.355216979980469, + "logps/chosen": -3974.8515625, + "logps/rejected": -3974.8515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -394.6578369140625, + "rewards/margins": 0.0, + "rewards/rejected": -394.6578369140625, + "step": 332 + }, + { + "epoch": 3.5052631578947366, + "grad_norm": 2.7081296138931066e-06, + "learning_rate": 0.0001931578947368421, + "logits/chosen": 13.350250244140625, + "logits/rejected": 13.350250244140625, + "logps/chosen": -3739.2255859375, + "logps/rejected": -3739.2255859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -371.0042419433594, + "rewards/margins": 0.0, + "rewards/rejected": -371.0042419433594, + "step": 333 + }, + { + "epoch": 3.515789473684211, + "grad_norm": 2.4733512873353902e-06, + "learning_rate": 0.00019313684210526318, + "logits/chosen": 13.35256290435791, + "logits/rejected": 13.35256290435791, + "logps/chosen": -2659.8994140625, + "logps/rejected": -2659.8994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -263.2083435058594, + "rewards/margins": 0.0, + "rewards/rejected": -263.2083435058594, + "step": 334 + }, + { + "epoch": 3.526315789473684, + "grad_norm": 1.920208887895569e-06, + "learning_rate": 0.00019311578947368423, + "logits/chosen": 13.312738418579102, + "logits/rejected": 13.312738418579102, + "logps/chosen": -3980.271484375, + "logps/rejected": -3980.271484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -395.1998291015625, + "rewards/margins": 0.0, + "rewards/rejected": -395.1998291015625, + "step": 335 + }, + { + "epoch": 3.536842105263158, + "grad_norm": 2.4315995688084513e-06, + "learning_rate": 0.00019309473684210525, + "logits/chosen": 13.320900917053223, + "logits/rejected": 13.320900917053223, + "logps/chosen": -5155.7353515625, + "logps/rejected": -5155.7353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -512.6351318359375, + "rewards/margins": 0.0, + "rewards/rejected": -512.6351318359375, + "step": 336 + }, + { + "epoch": 3.5473684210526315, + "grad_norm": 1.4706346291859518e-06, + "learning_rate": 0.00019307368421052633, + "logits/chosen": 13.275252342224121, + "logits/rejected": 13.275252342224121, + "logps/chosen": -3532.2998046875, + "logps/rejected": -3532.2998046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.2041015625, + "rewards/margins": 0.0, + "rewards/rejected": -350.2041015625, + "step": 337 + }, + { + "epoch": 3.557894736842105, + "grad_norm": 2.0902027699776227e-06, + "learning_rate": 0.00019305263157894738, + "logits/chosen": 13.288823127746582, + "logits/rejected": 13.288823127746582, + "logps/chosen": -5158.9541015625, + "logps/rejected": -5158.9541015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -512.9569702148438, + "rewards/margins": 0.0, + "rewards/rejected": -512.9569702148438, + "step": 338 + }, + { + "epoch": 3.568421052631579, + "grad_norm": 1.6481401416967856e-06, + "learning_rate": 0.00019303157894736843, + "logits/chosen": 13.26176929473877, + "logits/rejected": 13.26176929473877, + "logps/chosen": -4313.7587890625, + "logps/rejected": -4313.7587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.1910095214844, + "rewards/margins": 0.0, + "rewards/rejected": -428.1910095214844, + "step": 339 + }, + { + "epoch": 3.5789473684210527, + "grad_norm": 2.2164347228681436e-06, + "learning_rate": 0.00019301052631578948, + "logits/chosen": 13.27258586883545, + "logits/rejected": 13.27258586883545, + "logps/chosen": -5161.4375, + "logps/rejected": -5161.4375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.205322265625, + "rewards/margins": 0.0, + "rewards/rejected": -513.205322265625, + "step": 340 + }, + { + "epoch": 3.5894736842105264, + "grad_norm": 1.5355477671619155e-06, + "learning_rate": 0.00019298947368421055, + "logits/chosen": 13.252595901489258, + "logits/rejected": 13.252595901489258, + "logps/chosen": -4276.59765625, + "logps/rejected": -4276.59765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -424.8623962402344, + "rewards/margins": 0.0, + "rewards/rejected": -424.8623962402344, + "step": 341 + }, + { + "epoch": 3.6, + "grad_norm": 1.5529604979747091e-06, + "learning_rate": 0.0001929684210526316, + "logits/chosen": 13.25332260131836, + "logits/rejected": 13.25332260131836, + "logps/chosen": -4317.2294921875, + "logps/rejected": -4317.2294921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.5380859375, + "rewards/margins": 0.0, + "rewards/rejected": -428.5380859375, + "step": 342 + }, + { + "epoch": 3.610526315789474, + "grad_norm": 1.4351383015309693e-06, + "learning_rate": 0.00019294736842105263, + "logits/chosen": 13.258505821228027, + "logits/rejected": 13.258505821228027, + "logps/chosen": -2961.806640625, + "logps/rejected": -2961.806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.4544677734375, + "rewards/margins": 0.0, + "rewards/rejected": -293.4544677734375, + "step": 343 + }, + { + "epoch": 3.6210526315789475, + "grad_norm": 1.7823775806391495e-06, + "learning_rate": 0.0001929263157894737, + "logits/chosen": 13.270627975463867, + "logits/rejected": 13.270627975463867, + "logps/chosen": -5166.19921875, + "logps/rejected": -5166.19921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.6815185546875, + "rewards/margins": 0.0, + "rewards/rejected": -513.6815185546875, + "step": 344 + }, + { + "epoch": 3.6315789473684212, + "grad_norm": 1.610401000107231e-06, + "learning_rate": 0.00019290526315789475, + "logits/chosen": 13.24936580657959, + "logits/rejected": 13.24936580657959, + "logps/chosen": -4870.50390625, + "logps/rejected": -4870.50390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.2032165527344, + "rewards/margins": 0.0, + "rewards/rejected": -484.2032165527344, + "step": 345 + }, + { + "epoch": 3.6421052631578945, + "grad_norm": 1.3039498298894614e-06, + "learning_rate": 0.0001928842105263158, + "logits/chosen": 13.25102710723877, + "logits/rejected": 13.25102710723877, + "logps/chosen": -3538.1865234375, + "logps/rejected": -3538.1865234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.79278564453125, + "rewards/margins": 0.0, + "rewards/rejected": -350.79278564453125, + "step": 346 + }, + { + "epoch": 3.6526315789473687, + "grad_norm": 1.6418638324466883e-06, + "learning_rate": 0.00019286315789473685, + "logits/chosen": 13.280070304870605, + "logits/rejected": 13.280070304870605, + "logps/chosen": -2668.4765625, + "logps/rejected": -2668.4765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.0660705566406, + "rewards/margins": 0.0, + "rewards/rejected": -264.0660705566406, + "step": 347 + }, + { + "epoch": 3.663157894736842, + "grad_norm": 1.4463740853898344e-06, + "learning_rate": 0.00019284210526315793, + "logits/chosen": 13.26858139038086, + "logits/rejected": 13.26858139038086, + "logps/chosen": -3771.80859375, + "logps/rejected": -3771.80859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.3117370605469, + "rewards/margins": 0.0, + "rewards/rejected": -374.3117370605469, + "step": 348 + }, + { + "epoch": 3.6736842105263157, + "grad_norm": 1.3835291383657022e-06, + "learning_rate": 0.00019282105263157895, + "logits/chosen": 13.270120620727539, + "logits/rejected": 13.270120620727539, + "logps/chosen": -3772.134765625, + "logps/rejected": -3772.134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.3443603515625, + "rewards/margins": 0.0, + "rewards/rejected": -374.3443603515625, + "step": 349 + }, + { + "epoch": 3.6842105263157894, + "grad_norm": 1.1906093959623831e-06, + "learning_rate": 0.0001928, + "logits/chosen": 13.262545585632324, + "logits/rejected": 13.262545585632324, + "logps/chosen": -3540.462890625, + "logps/rejected": -3540.462890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0204162597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.0204162597656, + "step": 350 + }, + { + "epoch": 3.6842105263157894, + "eval_logits/chosen": 13.283880233764648, + "eval_logits/rejected": 13.283880233764648, + "eval_logps/chosen": -4306.6044921875, + "eval_logps/rejected": -4306.6044921875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -427.75726318359375, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -427.75726318359375, + "eval_runtime": 4.5064, + "eval_samples_per_second": 2.219, + "eval_steps_per_second": 2.219, + "step": 350 + }, + { + "epoch": 3.694736842105263, + "grad_norm": 1.1118454494862817e-06, + "learning_rate": 0.00019277894736842105, + "logits/chosen": 13.269845008850098, + "logits/rejected": 13.269845008850098, + "logps/chosen": -3753.693359375, + "logps/rejected": -3753.693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.4510192871094, + "rewards/margins": 0.0, + "rewards/rejected": -372.4510192871094, + "step": 351 + }, + { + "epoch": 3.705263157894737, + "grad_norm": 1.3817088984069414e-06, + "learning_rate": 0.00019275789473684212, + "logits/chosen": 13.27553939819336, + "logits/rejected": 13.27553939819336, + "logps/chosen": -4283.69775390625, + "logps/rejected": -4283.69775390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.5724182128906, + "rewards/margins": 0.0, + "rewards/rejected": -425.5724182128906, + "step": 352 + }, + { + "epoch": 3.7157894736842105, + "grad_norm": 1.2998527836316498e-06, + "learning_rate": 0.00019273684210526317, + "logits/chosen": 13.281391143798828, + "logits/rejected": 13.281391143798828, + "logps/chosen": -2965.2236328125, + "logps/rejected": -2965.2236328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.7961730957031, + "rewards/margins": 0.0, + "rewards/rejected": -293.7961730957031, + "step": 353 + }, + { + "epoch": 3.7263157894736842, + "grad_norm": 1.3068009820926818e-06, + "learning_rate": 0.00019271578947368422, + "logits/chosen": 13.27377986907959, + "logits/rejected": 13.27377986907959, + "logps/chosen": -4284.29052734375, + "logps/rejected": -4284.29052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.6316833496094, + "rewards/margins": 0.0, + "rewards/rejected": -425.6316833496094, + "step": 354 + }, + { + "epoch": 3.736842105263158, + "grad_norm": 2.0515692540357122e-06, + "learning_rate": 0.00019269473684210527, + "logits/chosen": 13.265348434448242, + "logits/rejected": 13.265348434448242, + "logps/chosen": -4873.818359375, + "logps/rejected": -4873.818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.53466796875, + "rewards/margins": 0.0, + "rewards/rejected": -484.53466796875, + "step": 355 + }, + { + "epoch": 3.7473684210526317, + "grad_norm": 9.791186812435626e-07, + "learning_rate": 0.00019267368421052632, + "logits/chosen": 13.267481803894043, + "logits/rejected": 13.267481803894043, + "logps/chosen": -3755.564453125, + "logps/rejected": -3755.564453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.6381530761719, + "rewards/margins": 0.0, + "rewards/rejected": -372.6381530761719, + "step": 356 + }, + { + "epoch": 3.7578947368421054, + "grad_norm": 1.958782149813487e-06, + "learning_rate": 0.00019265263157894737, + "logits/chosen": 13.266080856323242, + "logits/rejected": 13.266080856323242, + "logps/chosen": -4874.7158203125, + "logps/rejected": -4874.7158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.6244201660156, + "rewards/margins": 0.0, + "rewards/rejected": -484.6244201660156, + "step": 357 + }, + { + "epoch": 3.768421052631579, + "grad_norm": 1.228083760906884e-06, + "learning_rate": 0.00019263157894736842, + "logits/chosen": 13.273797988891602, + "logits/rejected": 13.273797988891602, + "logps/chosen": -3775.4091796875, + "logps/rejected": -3775.4091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.67181396484375, + "rewards/margins": 0.0, + "rewards/rejected": -374.67181396484375, + "step": 358 + }, + { + "epoch": 3.7789473684210524, + "grad_norm": 1.2550809742606361e-06, + "learning_rate": 0.0001926105263157895, + "logits/chosen": 13.265068054199219, + "logits/rejected": 13.265068054199219, + "logps/chosen": -3993.08203125, + "logps/rejected": -3993.08203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.48089599609375, + "rewards/margins": 0.0, + "rewards/rejected": -396.48089599609375, + "step": 359 + }, + { + "epoch": 3.7894736842105265, + "grad_norm": 1.412403435097076e-06, + "learning_rate": 0.00019258947368421054, + "logits/chosen": 13.28840446472168, + "logits/rejected": 13.28840446472168, + "logps/chosen": -2671.6513671875, + "logps/rejected": -2671.6513671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.383544921875, + "rewards/margins": 0.0, + "rewards/rejected": -264.383544921875, + "step": 360 + }, + { + "epoch": 3.8, + "grad_norm": 1.3179900406612433e-06, + "learning_rate": 0.0001925684210526316, + "logits/chosen": 13.26526165008545, + "logits/rejected": 13.26526165008545, + "logps/chosen": -4876.15625, + "logps/rejected": -4876.15625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.7684631347656, + "rewards/margins": 0.0, + "rewards/rejected": -484.7684631347656, + "step": 361 + }, + { + "epoch": 3.8105263157894735, + "grad_norm": 1.2032616041324218e-06, + "learning_rate": 0.00019254736842105264, + "logits/chosen": 13.256503105163574, + "logits/rejected": 13.256503105163574, + "logps/chosen": -3993.77734375, + "logps/rejected": -3993.77734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5504150390625, + "rewards/margins": 0.0, + "rewards/rejected": -396.5504150390625, + "step": 362 + }, + { + "epoch": 3.8210526315789473, + "grad_norm": 1.1747747521440033e-06, + "learning_rate": 0.0001925263157894737, + "logits/chosen": 13.263219833374023, + "logits/rejected": 13.263219833374023, + "logps/chosen": -4325.8203125, + "logps/rejected": -4325.8203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3971862792969, + "rewards/margins": 0.0, + "rewards/rejected": -429.3971862792969, + "step": 363 + }, + { + "epoch": 3.831578947368421, + "grad_norm": 1.5644076256648987e-06, + "learning_rate": 0.00019250526315789474, + "logits/chosen": 13.276131629943848, + "logits/rejected": 13.276131629943848, + "logps/chosen": -5173.10595703125, + "logps/rejected": -5173.10595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3721923828125, + "rewards/margins": 0.0, + "rewards/rejected": -514.3721923828125, + "step": 364 + }, + { + "epoch": 3.8421052631578947, + "grad_norm": 1.4643563872596133e-06, + "learning_rate": 0.0001924842105263158, + "logits/chosen": 13.245011329650879, + "logits/rejected": 13.245011329650879, + "logps/chosen": -3541.8720703125, + "logps/rejected": -3541.8720703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1613464355469, + "rewards/margins": 0.0, + "rewards/rejected": -351.1613464355469, + "step": 365 + }, + { + "epoch": 3.8526315789473684, + "grad_norm": 1.7050610949809197e-06, + "learning_rate": 0.00019246315789473687, + "logits/chosen": 13.251697540283203, + "logits/rejected": 13.251697540283203, + "logps/chosen": -4288.4130859375, + "logps/rejected": -4288.4130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0439453125, + "rewards/margins": 0.0, + "rewards/rejected": -426.0439453125, + "step": 366 + }, + { + "epoch": 3.863157894736842, + "grad_norm": 1.2089234360246337e-06, + "learning_rate": 0.00019244210526315792, + "logits/chosen": 13.23674488067627, + "logits/rejected": 13.23674488067627, + "logps/chosen": -3995.416015625, + "logps/rejected": -3995.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.71429443359375, + "rewards/margins": 0.0, + "rewards/rejected": -396.71429443359375, + "step": 367 + }, + { + "epoch": 3.873684210526316, + "grad_norm": 1.1628878837655066e-06, + "learning_rate": 0.00019242105263157894, + "logits/chosen": 13.228681564331055, + "logits/rejected": 13.228681564331055, + "logps/chosen": -3995.482421875, + "logps/rejected": -3995.482421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7209167480469, + "rewards/margins": 0.0, + "rewards/rejected": -396.7209167480469, + "step": 368 + }, + { + "epoch": 3.8842105263157896, + "grad_norm": 1.2132433084843797e-06, + "learning_rate": 0.00019240000000000001, + "logits/chosen": 13.220160484313965, + "logits/rejected": 13.220160484313965, + "logps/chosen": -3541.779296875, + "logps/rejected": -3541.779296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1520690917969, + "rewards/margins": 0.0, + "rewards/rejected": -351.1520690917969, + "step": 369 + }, + { + "epoch": 3.8947368421052633, + "grad_norm": 1.105313685911824e-06, + "learning_rate": 0.00019237894736842106, + "logits/chosen": 13.209223747253418, + "logits/rejected": 13.209223747253418, + "logps/chosen": -3995.791015625, + "logps/rejected": -3995.791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7518005371094, + "rewards/margins": 0.0, + "rewards/rejected": -396.7518005371094, + "step": 370 + }, + { + "epoch": 3.905263157894737, + "grad_norm": 1.171314693237946e-06, + "learning_rate": 0.0001923578947368421, + "logits/chosen": 13.208657264709473, + "logits/rejected": 13.208657264709473, + "logps/chosen": -4289.1083984375, + "logps/rejected": -4289.1083984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1134948730469, + "rewards/margins": 0.0, + "rewards/rejected": -426.1134948730469, + "step": 371 + }, + { + "epoch": 3.9157894736842103, + "grad_norm": 1.3756163070866023e-06, + "learning_rate": 0.00019233684210526316, + "logits/chosen": 13.194493293762207, + "logits/rejected": 13.194493293762207, + "logps/chosen": -4877.82177734375, + "logps/rejected": -4877.82177734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9350280761719, + "rewards/margins": 0.0, + "rewards/rejected": -484.9350280761719, + "step": 372 + }, + { + "epoch": 3.9263157894736844, + "grad_norm": 1.2630104038180434e-06, + "learning_rate": 0.00019231578947368424, + "logits/chosen": 13.204888343811035, + "logits/rejected": 13.204888343811035, + "logps/chosen": -2672.6591796875, + "logps/rejected": -2672.6591796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4843444824219, + "rewards/margins": 0.0, + "rewards/rejected": -264.4843444824219, + "step": 373 + }, + { + "epoch": 3.9368421052631577, + "grad_norm": 1.0803781833601533e-06, + "learning_rate": 0.0001922947368421053, + "logits/chosen": 13.172213554382324, + "logits/rejected": 13.172213554382324, + "logps/chosen": -3997.701171875, + "logps/rejected": -3997.701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.94281005859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.94281005859375, + "step": 374 + }, + { + "epoch": 3.9473684210526314, + "grad_norm": 1.0733593853728962e-06, + "learning_rate": 0.0001922736842105263, + "logits/chosen": 13.162105560302734, + "logits/rejected": 13.162105560302734, + "logps/chosen": -3997.857421875, + "logps/rejected": -3997.857421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.95843505859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.95843505859375, + "step": 375 + }, + { + "epoch": 3.957894736842105, + "grad_norm": 9.741016810949077e-07, + "learning_rate": 0.0001922526315789474, + "logits/chosen": 13.156563758850098, + "logits/rejected": 13.156563758850098, + "logps/chosen": -3757.431640625, + "logps/rejected": -3757.431640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8248596191406, + "rewards/margins": 0.0, + "rewards/rejected": -372.8248596191406, + "step": 376 + }, + { + "epoch": 3.968421052631579, + "grad_norm": 1.3226389228293556e-06, + "learning_rate": 0.00019223157894736844, + "logits/chosen": 13.146832466125488, + "logits/rejected": 13.146832466125488, + "logps/chosen": -4877.81298828125, + "logps/rejected": -4877.81298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.93414306640625, + "rewards/margins": 0.0, + "rewards/rejected": -484.93414306640625, + "step": 377 + }, + { + "epoch": 3.9789473684210526, + "grad_norm": 1.6002762777134194e-06, + "learning_rate": 0.00019221052631578949, + "logits/chosen": 13.164546966552734, + "logits/rejected": 13.164546966552734, + "logps/chosen": -5172.0595703125, + "logps/rejected": -5172.0595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2675170898438, + "rewards/margins": 0.0, + "rewards/rejected": -514.2675170898438, + "step": 378 + }, + { + "epoch": 3.9894736842105263, + "grad_norm": 1.0010132882598555e-06, + "learning_rate": 0.00019218947368421053, + "logits/chosen": 13.131548881530762, + "logits/rejected": 13.131548881530762, + "logps/chosen": -3999.15234375, + "logps/rejected": -3999.15234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0879211425781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0879211425781, + "step": 379 + }, + { + "epoch": 4.0, + "grad_norm": 1.7763335335985175e-06, + "learning_rate": 0.0001921684210526316, + "logits/chosen": 13.159083366394043, + "logits/rejected": 13.159083366394043, + "logps/chosen": -5171.75732421875, + "logps/rejected": -5171.75732421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2373046875, + "rewards/margins": 0.0, + "rewards/rejected": -514.2373046875, + "step": 380 + }, + { + "epoch": 4.010526315789473, + "grad_norm": 1.333224417976453e-06, + "learning_rate": 0.00019214736842105263, + "logits/chosen": 13.134832382202148, + "logits/rejected": 13.134832382202148, + "logps/chosen": -4878.49365234375, + "logps/rejected": -4878.49365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.002197265625, + "rewards/margins": 0.0, + "rewards/rejected": -485.002197265625, + "step": 381 + }, + { + "epoch": 4.021052631578947, + "grad_norm": 1.2603699133251212e-06, + "learning_rate": 0.00019212631578947368, + "logits/chosen": 13.1292724609375, + "logits/rejected": 13.1292724609375, + "logps/chosen": -3999.875, + "logps/rejected": -3999.875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1601867675781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1601867675781, + "step": 382 + }, + { + "epoch": 4.031578947368421, + "grad_norm": 2.4716996449569706e-06, + "learning_rate": 0.00019210526315789473, + "logits/chosen": 13.159635543823242, + "logits/rejected": 13.159635543823242, + "logps/chosen": -5172.27783203125, + "logps/rejected": -5172.27783203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2893676757812, + "rewards/margins": 0.0, + "rewards/rejected": -514.2893676757812, + "step": 383 + }, + { + "epoch": 4.042105263157895, + "grad_norm": 1.8309251572645735e-06, + "learning_rate": 0.0001920842105263158, + "logits/chosen": 13.128382682800293, + "logits/rejected": 13.128382682800293, + "logps/chosen": -3540.001953125, + "logps/rejected": -3540.001953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9743347167969, + "rewards/margins": 0.0, + "rewards/rejected": -350.9743347167969, + "step": 384 + }, + { + "epoch": 4.052631578947368, + "grad_norm": 1.743502366480243e-06, + "learning_rate": 0.00019206315789473686, + "logits/chosen": 13.158990859985352, + "logits/rejected": 13.158990859985352, + "logps/chosen": -5173.2236328125, + "logps/rejected": -5173.2236328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3839721679688, + "rewards/margins": 0.0, + "rewards/rejected": -514.3839721679688, + "step": 385 + }, + { + "epoch": 4.063157894736842, + "grad_norm": 1.4177874163578963e-06, + "learning_rate": 0.0001920421052631579, + "logits/chosen": 13.139883995056152, + "logits/rejected": 13.139883995056152, + "logps/chosen": -4288.00390625, + "logps/rejected": -4288.00390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0030212402344, + "rewards/margins": 0.0, + "rewards/rejected": -426.0030212402344, + "step": 386 + }, + { + "epoch": 4.073684210526316, + "grad_norm": 1.272582608180528e-06, + "learning_rate": 0.00019202105263157896, + "logits/chosen": 13.142391204833984, + "logits/rejected": 13.142391204833984, + "logps/chosen": -4288.1630859375, + "logps/rejected": -4288.1630859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0189514160156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0189514160156, + "step": 387 + }, + { + "epoch": 4.08421052631579, + "grad_norm": 1.4503170859825332e-06, + "learning_rate": 0.000192, + "logits/chosen": 13.14102840423584, + "logits/rejected": 13.14102840423584, + "logps/chosen": -4879.34765625, + "logps/rejected": -4879.34765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0876159667969, + "rewards/margins": 0.0, + "rewards/rejected": -485.0876159667969, + "step": 388 + }, + { + "epoch": 4.094736842105263, + "grad_norm": 1.0536674608374597e-06, + "learning_rate": 0.00019197894736842105, + "logits/chosen": 13.139214515686035, + "logits/rejected": 13.139214515686035, + "logps/chosen": -3999.828125, + "logps/rejected": -3999.828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1554870605469, + "rewards/margins": 0.0, + "rewards/rejected": -397.1554870605469, + "step": 389 + }, + { + "epoch": 4.105263157894737, + "grad_norm": 1.036173443935695e-06, + "learning_rate": 0.0001919578947368421, + "logits/chosen": 13.14235782623291, + "logits/rejected": 13.14235782623291, + "logps/chosen": -3999.95703125, + "logps/rejected": -3999.95703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.16839599609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.16839599609375, + "step": 390 + }, + { + "epoch": 4.11578947368421, + "grad_norm": 1.262857836081821e-06, + "learning_rate": 0.00019193684210526318, + "logits/chosen": 13.149985313415527, + "logits/rejected": 13.149985313415527, + "logps/chosen": -4879.71923828125, + "logps/rejected": -4879.71923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.124755859375, + "rewards/margins": 0.0, + "rewards/rejected": -485.124755859375, + "step": 391 + }, + { + "epoch": 4.126315789473685, + "grad_norm": 1.1243696462770458e-06, + "learning_rate": 0.00019191578947368423, + "logits/chosen": 13.153663635253906, + "logits/rejected": 13.153663635253906, + "logps/chosen": -4879.74462890625, + "logps/rejected": -4879.74462890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1272888183594, + "rewards/margins": 0.0, + "rewards/rejected": -485.1272888183594, + "step": 392 + }, + { + "epoch": 4.136842105263158, + "grad_norm": 1.6817705272842431e-06, + "learning_rate": 0.00019189473684210528, + "logits/chosen": 13.16176986694336, + "logits/rejected": 13.16176986694336, + "logps/chosen": -4288.1015625, + "logps/rejected": -4288.1015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0127868652344, + "rewards/margins": 0.0, + "rewards/rejected": -426.0127868652344, + "step": 393 + }, + { + "epoch": 4.147368421052631, + "grad_norm": 1.2323616829235107e-06, + "learning_rate": 0.00019187368421052633, + "logits/chosen": 13.162755966186523, + "logits/rejected": 13.162755966186523, + "logps/chosen": -4880.0029296875, + "logps/rejected": -4880.0029296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.15313720703125, + "rewards/margins": 0.0, + "rewards/rejected": -485.15313720703125, + "step": 394 + }, + { + "epoch": 4.157894736842105, + "grad_norm": 1.7906470475281822e-06, + "learning_rate": 0.00019185263157894738, + "logits/chosen": 13.166156768798828, + "logits/rejected": 13.166156768798828, + "logps/chosen": -3756.595703125, + "logps/rejected": -3756.595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.74127197265625, + "rewards/margins": 0.0, + "rewards/rejected": -372.74127197265625, + "step": 395 + }, + { + "epoch": 4.168421052631579, + "grad_norm": 1.3215309309089207e-06, + "learning_rate": 0.00019183157894736843, + "logits/chosen": 13.161626815795898, + "logits/rejected": 13.161626815795898, + "logps/chosen": -3999.72265625, + "logps/rejected": -3999.72265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.14495849609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.14495849609375, + "step": 396 + }, + { + "epoch": 4.178947368421053, + "grad_norm": 1.270137659048487e-06, + "learning_rate": 0.00019181052631578948, + "logits/chosen": 13.1672945022583, + "logits/rejected": 13.1672945022583, + "logps/chosen": -4880.52880859375, + "logps/rejected": -4880.52880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2057189941406, + "rewards/margins": 0.0, + "rewards/rejected": -485.2057189941406, + "step": 397 + }, + { + "epoch": 4.189473684210526, + "grad_norm": 2.2139588509162422e-06, + "learning_rate": 0.00019178947368421055, + "logits/chosen": 13.173166275024414, + "logits/rejected": 13.173166275024414, + "logps/chosen": -2964.701171875, + "logps/rejected": -2964.701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.7439270019531, + "rewards/margins": 0.0, + "rewards/rejected": -293.7439270019531, + "step": 398 + }, + { + "epoch": 4.2, + "grad_norm": 1.464019192098931e-06, + "learning_rate": 0.0001917684210526316, + "logits/chosen": 13.16378402709961, + "logits/rejected": 13.16378402709961, + "logps/chosen": -3774.458984375, + "logps/rejected": -3774.458984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.5767822265625, + "rewards/margins": 0.0, + "rewards/rejected": -374.5767822265625, + "step": 399 + }, + { + "epoch": 4.2105263157894735, + "grad_norm": 1.2820212305086898e-06, + "learning_rate": 0.00019174736842105262, + "logits/chosen": 13.157520294189453, + "logits/rejected": 13.157520294189453, + "logps/chosen": -4881.1240234375, + "logps/rejected": -4881.1240234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2652282714844, + "rewards/margins": 0.0, + "rewards/rejected": -485.2652282714844, + "step": 400 + }, + { + "epoch": 4.2105263157894735, + "eval_logits/chosen": 13.169970512390137, + "eval_logits/rejected": 13.169970512390137, + "eval_logps/chosen": -4308.177734375, + "eval_logps/rejected": -4308.177734375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -427.91461181640625, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -427.91461181640625, + "eval_runtime": 4.6141, + "eval_samples_per_second": 2.167, + "eval_steps_per_second": 2.167, + "step": 400 + }, + { + "epoch": 4.221052631578948, + "grad_norm": 1.2124367003707448e-06, + "learning_rate": 0.0001917263157894737, + "logits/chosen": 13.173371315002441, + "logits/rejected": 13.173371315002441, + "logps/chosen": -2669.56640625, + "logps/rejected": -2669.56640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.175048828125, + "rewards/margins": 0.0, + "rewards/rejected": -264.175048828125, + "step": 401 + }, + { + "epoch": 4.231578947368421, + "grad_norm": 2.1893995381105924e-06, + "learning_rate": 0.00019170526315789475, + "logits/chosen": 13.178827285766602, + "logits/rejected": 13.178827285766602, + "logps/chosen": -5173.55859375, + "logps/rejected": -5173.55859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4174194335938, + "rewards/margins": 0.0, + "rewards/rejected": -514.4174194335938, + "step": 402 + }, + { + "epoch": 4.242105263157895, + "grad_norm": 1.2583254829223733e-06, + "learning_rate": 0.0001916842105263158, + "logits/chosen": 13.161089897155762, + "logits/rejected": 13.161089897155762, + "logps/chosen": -4288.7822265625, + "logps/rejected": -4288.7822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.08087158203125, + "rewards/margins": 0.0, + "rewards/rejected": -426.08087158203125, + "step": 403 + }, + { + "epoch": 4.252631578947368, + "grad_norm": 1.2309641306273988e-06, + "learning_rate": 0.00019166315789473685, + "logits/chosen": 13.155193328857422, + "logits/rejected": 13.155193328857422, + "logps/chosen": -3999.55859375, + "logps/rejected": -3999.55859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1285400390625, + "rewards/margins": 0.0, + "rewards/rejected": -397.1285400390625, + "step": 404 + }, + { + "epoch": 4.2631578947368425, + "grad_norm": 1.2718624020635616e-06, + "learning_rate": 0.00019164210526315792, + "logits/chosen": 13.167367935180664, + "logits/rejected": 13.167367935180664, + "logps/chosen": -3775.71875, + "logps/rejected": -3775.71875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7027587890625, + "rewards/margins": 0.0, + "rewards/rejected": -374.7027587890625, + "step": 405 + }, + { + "epoch": 4.273684210526316, + "grad_norm": 1.0539905588302645e-06, + "learning_rate": 0.00019162105263157895, + "logits/chosen": 13.168222427368164, + "logits/rejected": 13.168222427368164, + "logps/chosen": -4881.8994140625, + "logps/rejected": -4881.8994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3427734375, + "rewards/margins": 0.0, + "rewards/rejected": -485.3427734375, + "step": 406 + }, + { + "epoch": 4.284210526315789, + "grad_norm": 1.0759672477433924e-06, + "learning_rate": 0.0001916, + "logits/chosen": 13.163827896118164, + "logits/rejected": 13.163827896118164, + "logps/chosen": -3999.337890625, + "logps/rejected": -3999.337890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1064758300781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1064758300781, + "step": 407 + }, + { + "epoch": 4.294736842105263, + "grad_norm": 1.7650302197580459e-06, + "learning_rate": 0.00019157894736842104, + "logits/chosen": 13.197267532348633, + "logits/rejected": 13.197267532348633, + "logps/chosen": -5173.19287109375, + "logps/rejected": -5173.19287109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.380859375, + "rewards/margins": 0.0, + "rewards/rejected": -514.380859375, + "step": 408 + }, + { + "epoch": 4.3052631578947365, + "grad_norm": 2.2256742795434548e-06, + "learning_rate": 0.00019155789473684212, + "logits/chosen": 13.2003812789917, + "logits/rejected": 13.2003812789917, + "logps/chosen": -5173.109375, + "logps/rejected": -5173.109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3724975585938, + "rewards/margins": 0.0, + "rewards/rejected": -514.3724975585938, + "step": 409 + }, + { + "epoch": 4.315789473684211, + "grad_norm": 2.0965801468264544e-06, + "learning_rate": 0.00019153684210526317, + "logits/chosen": 13.176103591918945, + "logits/rejected": 13.176103591918945, + "logps/chosen": -3756.27734375, + "logps/rejected": -3756.27734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.7094421386719, + "rewards/margins": 0.0, + "rewards/rejected": -372.7094421386719, + "step": 410 + }, + { + "epoch": 4.326315789473684, + "grad_norm": 1.487669351263321e-06, + "learning_rate": 0.00019151578947368422, + "logits/chosen": 13.169191360473633, + "logits/rejected": 13.169191360473633, + "logps/chosen": -3998.92578125, + "logps/rejected": -3998.92578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0652770996094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0652770996094, + "step": 411 + }, + { + "epoch": 4.336842105263158, + "grad_norm": 1.2760125400745892e-06, + "learning_rate": 0.0001914947368421053, + "logits/chosen": 13.163147926330566, + "logits/rejected": 13.163147926330566, + "logps/chosen": -3998.853515625, + "logps/rejected": -3998.853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.05804443359375, + "rewards/margins": 0.0, + "rewards/rejected": -397.05804443359375, + "step": 412 + }, + { + "epoch": 4.347368421052631, + "grad_norm": 1.5911463151496719e-06, + "learning_rate": 0.00019147368421052632, + "logits/chosen": 13.163750648498535, + "logits/rejected": 13.163750648498535, + "logps/chosen": -4288.0498046875, + "logps/rejected": -4288.0498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.00762939453125, + "rewards/margins": 0.0, + "rewards/rejected": -426.00762939453125, + "step": 413 + }, + { + "epoch": 4.3578947368421055, + "grad_norm": 1.0120020306203514e-06, + "learning_rate": 0.00019145263157894737, + "logits/chosen": 13.14409351348877, + "logits/rejected": 13.14409351348877, + "logps/chosen": -3999.26171875, + "logps/rejected": -3999.26171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0988464355469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0988464355469, + "step": 414 + }, + { + "epoch": 4.368421052631579, + "grad_norm": 1.0539432651057723e-06, + "learning_rate": 0.00019143157894736842, + "logits/chosen": 13.138327598571777, + "logits/rejected": 13.138327598571777, + "logps/chosen": -3756.953125, + "logps/rejected": -3756.953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.7770080566406, + "rewards/margins": 0.0, + "rewards/rejected": -372.7770080566406, + "step": 415 + }, + { + "epoch": 4.378947368421053, + "grad_norm": 1.5022621937532676e-06, + "learning_rate": 0.0001914105263157895, + "logits/chosen": 13.130035400390625, + "logits/rejected": 13.130035400390625, + "logps/chosen": -4881.49169921875, + "logps/rejected": -4881.49169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.302001953125, + "rewards/margins": 0.0, + "rewards/rejected": -485.302001953125, + "step": 416 + }, + { + "epoch": 4.389473684210526, + "grad_norm": 1.2897043006887543e-06, + "learning_rate": 0.00019138947368421054, + "logits/chosen": 13.139795303344727, + "logits/rejected": 13.139795303344727, + "logps/chosen": -2670.173828125, + "logps/rejected": -2670.173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.2358093261719, + "rewards/margins": 0.0, + "rewards/rejected": -264.2358093261719, + "step": 417 + }, + { + "epoch": 4.4, + "grad_norm": 1.2354365708233672e-06, + "learning_rate": 0.0001913684210526316, + "logits/chosen": 13.133476257324219, + "logits/rejected": 13.133476257324219, + "logps/chosen": -2670.3125, + "logps/rejected": -2670.3125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.2496643066406, + "rewards/margins": 0.0, + "rewards/rejected": -264.2496643066406, + "step": 418 + }, + { + "epoch": 4.410526315789474, + "grad_norm": 9.402820637660625e-07, + "learning_rate": 0.00019134736842105264, + "logits/chosen": 13.107630729675293, + "logits/rejected": 13.107630729675293, + "logps/chosen": -3756.966796875, + "logps/rejected": -3756.966796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.77838134765625, + "rewards/margins": 0.0, + "rewards/rejected": -372.77838134765625, + "step": 419 + }, + { + "epoch": 4.421052631578947, + "grad_norm": 1.1259535313001834e-06, + "learning_rate": 0.0001913263157894737, + "logits/chosen": 13.110640525817871, + "logits/rejected": 13.110640525817871, + "logps/chosen": -2965.091796875, + "logps/rejected": -2965.091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.7829895019531, + "rewards/margins": 0.0, + "rewards/rejected": -293.7829895019531, + "step": 420 + }, + { + "epoch": 4.431578947368421, + "grad_norm": 2.4301314169861143e-06, + "learning_rate": 0.00019130526315789474, + "logits/chosen": 13.100411415100098, + "logits/rejected": 13.100411415100098, + "logps/chosen": -4320.720703125, + "logps/rejected": -4320.720703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.88720703125, + "rewards/margins": 0.0, + "rewards/rejected": -428.88720703125, + "step": 421 + }, + { + "epoch": 4.442105263157894, + "grad_norm": 2.2063109099690337e-06, + "learning_rate": 0.0001912842105263158, + "logits/chosen": 13.100083351135254, + "logits/rejected": 13.100083351135254, + "logps/chosen": -4321.380859375, + "logps/rejected": -4321.380859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.9532165527344, + "rewards/margins": 0.0, + "rewards/rejected": -428.9532165527344, + "step": 422 + }, + { + "epoch": 4.4526315789473685, + "grad_norm": 1.871135395958845e-06, + "learning_rate": 0.00019126315789473686, + "logits/chosen": 13.125321388244629, + "logits/rejected": 13.125321388244629, + "logps/chosen": -5172.970703125, + "logps/rejected": -5172.970703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.358642578125, + "rewards/margins": 0.0, + "rewards/rejected": -514.358642578125, + "step": 423 + }, + { + "epoch": 4.463157894736842, + "grad_norm": 1.4180277503328398e-06, + "learning_rate": 0.00019124210526315791, + "logits/chosen": 13.104350090026855, + "logits/rejected": 13.104350090026855, + "logps/chosen": -3538.36328125, + "logps/rejected": -3538.36328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.8104553222656, + "rewards/margins": 0.0, + "rewards/rejected": -350.8104553222656, + "step": 424 + }, + { + "epoch": 4.473684210526316, + "grad_norm": 1.996656692426768e-06, + "learning_rate": 0.00019122105263157896, + "logits/chosen": 13.137791633605957, + "logits/rejected": 13.137791633605957, + "logps/chosen": -2671.154296875, + "logps/rejected": -2671.154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3338317871094, + "rewards/margins": 0.0, + "rewards/rejected": -264.3338317871094, + "step": 425 + }, + { + "epoch": 4.484210526315789, + "grad_norm": 1.4522890978696523e-06, + "learning_rate": 0.0001912, + "logits/chosen": 13.131717681884766, + "logits/rejected": 13.131717681884766, + "logps/chosen": -4880.13671875, + "logps/rejected": -4880.13671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.16650390625, + "rewards/margins": 0.0, + "rewards/rejected": -485.16650390625, + "step": 426 + }, + { + "epoch": 4.494736842105263, + "grad_norm": 1.9031556348636514e-06, + "learning_rate": 0.00019117894736842106, + "logits/chosen": 13.130173683166504, + "logits/rejected": 13.130173683166504, + "logps/chosen": -3539.03125, + "logps/rejected": -3539.03125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.87725830078125, + "rewards/margins": 0.0, + "rewards/rejected": -350.87725830078125, + "step": 427 + }, + { + "epoch": 4.505263157894737, + "grad_norm": 2.2110975805844646e-06, + "learning_rate": 0.0001911578947368421, + "logits/chosen": 13.150248527526855, + "logits/rejected": 13.150248527526855, + "logps/chosen": -2966.357421875, + "logps/rejected": -2966.357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9095458984375, + "rewards/margins": 0.0, + "rewards/rejected": -293.9095458984375, + "step": 428 + }, + { + "epoch": 4.515789473684211, + "grad_norm": 1.2973713410247e-06, + "learning_rate": 0.00019113684210526316, + "logits/chosen": 13.148184776306152, + "logits/rejected": 13.148184776306152, + "logps/chosen": -3776.3828125, + "logps/rejected": -3776.3828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7691650390625, + "rewards/margins": 0.0, + "rewards/rejected": -374.7691650390625, + "step": 429 + }, + { + "epoch": 4.526315789473684, + "grad_norm": 9.659921715865494e-07, + "learning_rate": 0.00019111578947368424, + "logits/chosen": 13.14145278930664, + "logits/rejected": 13.14145278930664, + "logps/chosen": -3540.23828125, + "logps/rejected": -3540.23828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9979553222656, + "rewards/margins": 0.0, + "rewards/rejected": -350.9979553222656, + "step": 430 + }, + { + "epoch": 4.536842105263158, + "grad_norm": 9.392854849465948e-07, + "learning_rate": 0.00019109473684210529, + "logits/chosen": 13.14527416229248, + "logits/rejected": 13.14527416229248, + "logps/chosen": -3540.732421875, + "logps/rejected": -3540.732421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.04736328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.04736328125, + "step": 431 + }, + { + "epoch": 4.5473684210526315, + "grad_norm": 1.0593016668281052e-06, + "learning_rate": 0.0001910736842105263, + "logits/chosen": 13.15092945098877, + "logits/rejected": 13.15092945098877, + "logps/chosen": -3541.126953125, + "logps/rejected": -3541.126953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0868225097656, + "rewards/margins": 0.0, + "rewards/rejected": -351.0868225097656, + "step": 432 + }, + { + "epoch": 4.557894736842105, + "grad_norm": 1.668764070927864e-06, + "learning_rate": 0.00019105263157894738, + "logits/chosen": 13.164057731628418, + "logits/rejected": 13.164057731628418, + "logps/chosen": -3758.6455078125, + "logps/rejected": -3758.6455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9462585449219, + "rewards/margins": 0.0, + "rewards/rejected": -372.9462585449219, + "step": 433 + }, + { + "epoch": 4.568421052631579, + "grad_norm": 7.811093496457033e-07, + "learning_rate": 0.00019103157894736843, + "logits/chosen": 13.192194938659668, + "logits/rejected": 13.192194938659668, + "logps/chosen": -2673.822265625, + "logps/rejected": -2673.822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.60064697265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.60064697265625, + "step": 434 + }, + { + "epoch": 4.578947368421053, + "grad_norm": 3.6859380543319276e-06, + "learning_rate": 0.00019101052631578948, + "logits/chosen": 13.189213752746582, + "logits/rejected": 13.189213752746582, + "logps/chosen": -4325.8125, + "logps/rejected": -4325.8125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3963928222656, + "rewards/margins": 0.0, + "rewards/rejected": -429.3963928222656, + "step": 435 + }, + { + "epoch": 4.589473684210526, + "grad_norm": 1.823381353460718e-06, + "learning_rate": 0.00019098947368421053, + "logits/chosen": 13.189521789550781, + "logits/rejected": 13.189521789550781, + "logps/chosen": -3997.427734375, + "logps/rejected": -3997.427734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.91546630859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.91546630859375, + "step": 436 + }, + { + "epoch": 4.6, + "grad_norm": 8.962330753092829e-07, + "learning_rate": 0.0001909684210526316, + "logits/chosen": 13.202988624572754, + "logits/rejected": 13.202988624572754, + "logps/chosen": -3543.3759765625, + "logps/rejected": -3543.3759765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3117370605469, + "rewards/margins": 0.0, + "rewards/rejected": -351.3117370605469, + "step": 437 + }, + { + "epoch": 4.610526315789474, + "grad_norm": 1.3767996733804466e-06, + "learning_rate": 0.00019094736842105263, + "logits/chosen": 13.221596717834473, + "logits/rejected": 13.221596717834473, + "logps/chosen": -3777.140625, + "logps/rejected": -3777.140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8449401855469, + "rewards/margins": 0.0, + "rewards/rejected": -374.8449401855469, + "step": 438 + }, + { + "epoch": 4.621052631578947, + "grad_norm": 2.3824086383683607e-06, + "learning_rate": 0.00019092631578947368, + "logits/chosen": 13.229989051818848, + "logits/rejected": 13.229989051818848, + "logps/chosen": -4288.107421875, + "logps/rejected": -4288.107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0133972167969, + "rewards/margins": 0.0, + "rewards/rejected": -426.0133972167969, + "step": 439 + }, + { + "epoch": 4.631578947368421, + "grad_norm": 2.202197947553941e-06, + "learning_rate": 0.00019090526315789473, + "logits/chosen": 13.244230270385742, + "logits/rejected": 13.244230270385742, + "logps/chosen": -2673.4326171875, + "logps/rejected": -2673.4326171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5616760253906, + "rewards/margins": 0.0, + "rewards/rejected": -264.5616760253906, + "step": 440 + }, + { + "epoch": 4.6421052631578945, + "grad_norm": 1.6115047856146703e-06, + "learning_rate": 0.0001908842105263158, + "logits/chosen": 13.221540451049805, + "logits/rejected": 13.221540451049805, + "logps/chosen": -3544.4091796875, + "logps/rejected": -3544.4091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4150390625, + "rewards/margins": 0.0, + "rewards/rejected": -351.4150390625, + "step": 441 + }, + { + "epoch": 4.652631578947369, + "grad_norm": 1.139371875069628e-06, + "learning_rate": 0.00019086315789473686, + "logits/chosen": 13.217147827148438, + "logits/rejected": 13.217147827148438, + "logps/chosen": -3544.6357421875, + "logps/rejected": -3544.6357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4377136230469, + "rewards/margins": 0.0, + "rewards/rejected": -351.4377136230469, + "step": 442 + }, + { + "epoch": 4.663157894736842, + "grad_norm": 1.5991441841833876e-06, + "learning_rate": 0.0001908421052631579, + "logits/chosen": 13.220754623413086, + "logits/rejected": 13.220754623413086, + "logps/chosen": -4876.76171875, + "logps/rejected": -4876.76171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8290100097656, + "rewards/margins": 0.0, + "rewards/rejected": -484.8290100097656, + "step": 443 + }, + { + "epoch": 4.673684210526316, + "grad_norm": 8.980619554677105e-07, + "learning_rate": 0.00019082105263157895, + "logits/chosen": 13.223969459533691, + "logits/rejected": 13.223969459533691, + "logps/chosen": -2969.2216796875, + "logps/rejected": -2969.2216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.19598388671875, + "rewards/margins": 0.0, + "rewards/rejected": -294.19598388671875, + "step": 444 + }, + { + "epoch": 4.684210526315789, + "grad_norm": 3.341406909385114e-06, + "learning_rate": 0.0001908, + "logits/chosen": 13.24233627319336, + "logits/rejected": 13.24233627319336, + "logps/chosen": -5173.033203125, + "logps/rejected": -5173.033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3649291992188, + "rewards/margins": 0.0, + "rewards/rejected": -514.3649291992188, + "step": 445 + }, + { + "epoch": 4.6947368421052635, + "grad_norm": 8.327181717504573e-07, + "learning_rate": 0.00019077894736842105, + "logits/chosen": 13.230820655822754, + "logits/rejected": 13.230820655822754, + "logps/chosen": -2969.14453125, + "logps/rejected": -2969.14453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1882629394531, + "rewards/margins": 0.0, + "rewards/rejected": -294.1882629394531, + "step": 446 + }, + { + "epoch": 4.705263157894737, + "grad_norm": 9.506024412075931e-07, + "learning_rate": 0.0001907578947368421, + "logits/chosen": 13.245979309082031, + "logits/rejected": 13.245979309082031, + "logps/chosen": -2674.6337890625, + "logps/rejected": -2674.6337890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6817932128906, + "rewards/margins": 0.0, + "rewards/rejected": -264.6817932128906, + "step": 447 + }, + { + "epoch": 4.715789473684211, + "grad_norm": 7.398201091746159e-07, + "learning_rate": 0.00019073684210526318, + "logits/chosen": 13.229601860046387, + "logits/rejected": 13.229601860046387, + "logps/chosen": -3545.4248046875, + "logps/rejected": -3545.4248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5166015625, + "rewards/margins": 0.0, + "rewards/rejected": -351.5166015625, + "step": 448 + }, + { + "epoch": 4.726315789473684, + "grad_norm": 2.0775814846274443e-06, + "learning_rate": 0.00019071578947368423, + "logits/chosen": 13.263916015625, + "logits/rejected": 13.263916015625, + "logps/chosen": -5173.59130859375, + "logps/rejected": -5173.59130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4207153320312, + "rewards/margins": 0.0, + "rewards/rejected": -514.4207153320312, + "step": 449 + }, + { + "epoch": 4.7368421052631575, + "grad_norm": 1.680143554949609e-06, + "learning_rate": 0.00019069473684210528, + "logits/chosen": 13.272430419921875, + "logits/rejected": 13.272430419921875, + "logps/chosen": -5173.77880859375, + "logps/rejected": -5173.77880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.439453125, + "rewards/margins": 0.0, + "rewards/rejected": -514.439453125, + "step": 450 + }, + { + "epoch": 4.7368421052631575, + "eval_logits/chosen": 13.271191596984863, + "eval_logits/rejected": 13.271191596984863, + "eval_logps/chosen": -4310.1708984375, + "eval_logps/rejected": -4310.1708984375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.11395263671875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.11395263671875, + "eval_runtime": 4.6236, + "eval_samples_per_second": 2.163, + "eval_steps_per_second": 2.163, + "step": 450 + }, + { + "epoch": 4.747368421052632, + "grad_norm": 9.817190402827691e-07, + "learning_rate": 0.00019067368421052633, + "logits/chosen": 13.252490043640137, + "logits/rejected": 13.252490043640137, + "logps/chosen": -3546.0009765625, + "logps/rejected": -3546.0009765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.57421875, + "rewards/margins": 0.0, + "rewards/rejected": -351.57421875, + "step": 451 + }, + { + "epoch": 4.757894736842105, + "grad_norm": 1.3320783409653814e-06, + "learning_rate": 0.00019065263157894737, + "logits/chosen": 13.26960563659668, + "logits/rejected": 13.26960563659668, + "logps/chosen": -3777.662109375, + "logps/rejected": -3777.662109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8970947265625, + "rewards/margins": 0.0, + "rewards/rejected": -374.8970947265625, + "step": 452 + }, + { + "epoch": 4.768421052631579, + "grad_norm": 1.573008830746403e-06, + "learning_rate": 0.00019063157894736842, + "logits/chosen": 13.289057731628418, + "logits/rejected": 13.289057731628418, + "logps/chosen": -2674.783203125, + "logps/rejected": -2674.783203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6967468261719, + "rewards/margins": 0.0, + "rewards/rejected": -264.6967468261719, + "step": 453 + }, + { + "epoch": 4.778947368421052, + "grad_norm": 1.3278061032906407e-06, + "learning_rate": 0.00019061052631578947, + "logits/chosen": 13.281237602233887, + "logits/rejected": 13.281237602233887, + "logps/chosen": -4875.6162109375, + "logps/rejected": -4875.6162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.7144470214844, + "rewards/margins": 0.0, + "rewards/rejected": -484.7144470214844, + "step": 454 + }, + { + "epoch": 4.7894736842105265, + "grad_norm": 1.0953923492706963e-06, + "learning_rate": 0.00019058947368421055, + "logits/chosen": 13.275703430175781, + "logits/rejected": 13.275703430175781, + "logps/chosen": -3546.6806640625, + "logps/rejected": -3546.6806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.6421813964844, + "rewards/margins": 0.0, + "rewards/rejected": -351.6421813964844, + "step": 455 + }, + { + "epoch": 4.8, + "grad_norm": 1.286256861021684e-06, + "learning_rate": 0.0001905684210526316, + "logits/chosen": 13.285297393798828, + "logits/rejected": 13.285297393798828, + "logps/chosen": -3777.6103515625, + "logps/rejected": -3777.6103515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8919372558594, + "rewards/margins": 0.0, + "rewards/rejected": -374.8919372558594, + "step": 456 + }, + { + "epoch": 4.810526315789474, + "grad_norm": 1.295603055950778e-06, + "learning_rate": 0.00019054736842105262, + "logits/chosen": 13.311622619628906, + "logits/rejected": 13.311622619628906, + "logps/chosen": -5175.2099609375, + "logps/rejected": -5175.2099609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5825805664062, + "rewards/margins": 0.0, + "rewards/rejected": -514.5825805664062, + "step": 457 + }, + { + "epoch": 4.821052631578947, + "grad_norm": 1.3328030945558567e-06, + "learning_rate": 0.0001905263157894737, + "logits/chosen": 13.314613342285156, + "logits/rejected": 13.314613342285156, + "logps/chosen": -5175.31396484375, + "logps/rejected": -5175.31396484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5929565429688, + "rewards/margins": 0.0, + "rewards/rejected": -514.5929565429688, + "step": 458 + }, + { + "epoch": 4.831578947368421, + "grad_norm": 1.3567895393862273e-06, + "learning_rate": 0.00019050526315789475, + "logits/chosen": 13.295086860656738, + "logits/rejected": 13.295086860656738, + "logps/chosen": -4875.93017578125, + "logps/rejected": -4875.93017578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.745849609375, + "rewards/margins": 0.0, + "rewards/rejected": -484.745849609375, + "step": 459 + }, + { + "epoch": 4.842105263157895, + "grad_norm": 1.2604548373928992e-06, + "learning_rate": 0.0001904842105263158, + "logits/chosen": 13.324274063110352, + "logits/rejected": 13.324274063110352, + "logps/chosen": -5176.4375, + "logps/rejected": -5176.4375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.705322265625, + "rewards/margins": 0.0, + "rewards/rejected": -514.705322265625, + "step": 460 + }, + { + "epoch": 4.852631578947369, + "grad_norm": 1.3131174227964948e-06, + "learning_rate": 0.00019046315789473685, + "logits/chosen": 13.296370506286621, + "logits/rejected": 13.296370506286621, + "logps/chosen": -3993.275390625, + "logps/rejected": -3993.275390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5002136230469, + "rewards/margins": 0.0, + "rewards/rejected": -396.5002136230469, + "step": 461 + }, + { + "epoch": 4.863157894736842, + "grad_norm": 1.2121271311116288e-06, + "learning_rate": 0.00019044210526315792, + "logits/chosen": 13.317803382873535, + "logits/rejected": 13.317803382873535, + "logps/chosen": -2673.90234375, + "logps/rejected": -2673.90234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.608642578125, + "rewards/margins": 0.0, + "rewards/rejected": -264.608642578125, + "step": 462 + }, + { + "epoch": 4.873684210526315, + "grad_norm": 1.172584347841621e-06, + "learning_rate": 0.00019042105263157897, + "logits/chosen": 13.31067180633545, + "logits/rejected": 13.31067180633545, + "logps/chosen": -4328.0791015625, + "logps/rejected": -4328.0791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.623046875, + "rewards/margins": 0.0, + "rewards/rejected": -429.623046875, + "step": 463 + }, + { + "epoch": 4.88421052631579, + "grad_norm": 1.28936221699405e-06, + "learning_rate": 0.0001904, + "logits/chosen": 13.292346954345703, + "logits/rejected": 13.292346954345703, + "logps/chosen": -3993.26953125, + "logps/rejected": -3993.26953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.4996337890625, + "rewards/margins": 0.0, + "rewards/rejected": -396.4996337890625, + "step": 464 + }, + { + "epoch": 4.894736842105263, + "grad_norm": 1.2857520914622e-06, + "learning_rate": 0.00019037894736842107, + "logits/chosen": 13.285961151123047, + "logits/rejected": 13.285961151123047, + "logps/chosen": -3993.365234375, + "logps/rejected": -3993.365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.50921630859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.50921630859375, + "step": 465 + }, + { + "epoch": 4.905263157894737, + "grad_norm": 1.0654831612555427e-06, + "learning_rate": 0.00019035789473684212, + "logits/chosen": 13.28244686126709, + "logits/rejected": 13.28244686126709, + "logps/chosen": -3757.052734375, + "logps/rejected": -3757.052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.7869567871094, + "rewards/margins": 0.0, + "rewards/rejected": -372.7869567871094, + "step": 466 + }, + { + "epoch": 4.91578947368421, + "grad_norm": 1.2411975376380724e-06, + "learning_rate": 0.00019033684210526317, + "logits/chosen": 13.274484634399414, + "logits/rejected": 13.274484634399414, + "logps/chosen": -4877.8818359375, + "logps/rejected": -4877.8818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9410095214844, + "rewards/margins": 0.0, + "rewards/rejected": -484.9410095214844, + "step": 467 + }, + { + "epoch": 4.926315789473684, + "grad_norm": 1.0312362519471208e-06, + "learning_rate": 0.00019031578947368422, + "logits/chosen": 13.256425857543945, + "logits/rejected": 13.256425857543945, + "logps/chosen": -3545.3017578125, + "logps/rejected": -3545.3017578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5043029785156, + "rewards/margins": 0.0, + "rewards/rejected": -351.5043029785156, + "step": 468 + }, + { + "epoch": 4.936842105263158, + "grad_norm": 1.257968733625603e-06, + "learning_rate": 0.0001902947368421053, + "logits/chosen": 13.255985260009766, + "logits/rejected": 13.255985260009766, + "logps/chosen": -3777.646484375, + "logps/rejected": -3777.646484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8955383300781, + "rewards/margins": 0.0, + "rewards/rejected": -374.8955383300781, + "step": 469 + }, + { + "epoch": 4.947368421052632, + "grad_norm": 1.187660927826073e-06, + "learning_rate": 0.00019027368421052632, + "logits/chosen": 13.241244316101074, + "logits/rejected": 13.241244316101074, + "logps/chosen": -3995.07421875, + "logps/rejected": -3995.07421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.68011474609375, + "rewards/margins": 0.0, + "rewards/rejected": -396.68011474609375, + "step": 470 + }, + { + "epoch": 4.957894736842105, + "grad_norm": 1.061337911778537e-06, + "learning_rate": 0.00019025263157894737, + "logits/chosen": 13.232675552368164, + "logits/rejected": 13.232675552368164, + "logps/chosen": -3544.474609375, + "logps/rejected": -3544.474609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4216003417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4216003417969, + "step": 471 + }, + { + "epoch": 4.968421052631579, + "grad_norm": 1.2000216429441934e-06, + "learning_rate": 0.00019023157894736841, + "logits/chosen": 13.242695808410645, + "logits/rejected": 13.242695808410645, + "logps/chosen": -2673.0537109375, + "logps/rejected": -2673.0537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5237731933594, + "rewards/margins": 0.0, + "rewards/rejected": -264.5237731933594, + "step": 472 + }, + { + "epoch": 4.978947368421053, + "grad_norm": 1.227028064931801e-06, + "learning_rate": 0.0001902105263157895, + "logits/chosen": 13.229562759399414, + "logits/rejected": 13.229562759399414, + "logps/chosen": -4326.953125, + "logps/rejected": -4326.953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.5104675292969, + "rewards/margins": 0.0, + "rewards/rejected": -429.5104675292969, + "step": 473 + }, + { + "epoch": 4.989473684210527, + "grad_norm": 1.1333920610923087e-06, + "learning_rate": 0.00019018947368421054, + "logits/chosen": 13.216691017150879, + "logits/rejected": 13.216691017150879, + "logps/chosen": -2966.9140625, + "logps/rejected": -2966.9140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9652099609375, + "rewards/margins": 0.0, + "rewards/rejected": -293.9652099609375, + "step": 474 + }, + { + "epoch": 5.0, + "grad_norm": 1.122456751545542e-06, + "learning_rate": 0.0001901684210526316, + "logits/chosen": 13.198467254638672, + "logits/rejected": 13.198467254638672, + "logps/chosen": -3997.0859375, + "logps/rejected": -3997.0859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.88128662109375, + "rewards/margins": 0.0, + "rewards/rejected": -396.88128662109375, + "step": 475 + }, + { + "epoch": 5.010526315789473, + "grad_norm": 1.1456676247689757e-06, + "learning_rate": 0.00019014736842105264, + "logits/chosen": 13.1884183883667, + "logits/rejected": 13.1884183883667, + "logps/chosen": -3997.07421875, + "logps/rejected": -3997.07421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8800964355469, + "rewards/margins": 0.0, + "rewards/rejected": -396.8800964355469, + "step": 476 + }, + { + "epoch": 5.021052631578947, + "grad_norm": 9.451536584492715e-07, + "learning_rate": 0.0001901263157894737, + "logits/chosen": 13.196056365966797, + "logits/rejected": 13.196056365966797, + "logps/chosen": -2673.443359375, + "logps/rejected": -2673.443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.562744140625, + "rewards/margins": 0.0, + "rewards/rejected": -264.562744140625, + "step": 477 + }, + { + "epoch": 5.031578947368421, + "grad_norm": 8.419349342148053e-07, + "learning_rate": 0.00019010526315789474, + "logits/chosen": 13.166739463806152, + "logits/rejected": 13.166739463806152, + "logps/chosen": -3544.150390625, + "logps/rejected": -3544.150390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.38916015625, + "rewards/margins": 0.0, + "rewards/rejected": -351.38916015625, + "step": 478 + }, + { + "epoch": 5.042105263157895, + "grad_norm": 8.346195841113513e-07, + "learning_rate": 0.00019008421052631579, + "logits/chosen": 13.1581449508667, + "logits/rejected": 13.1581449508667, + "logps/chosen": -3544.2314453125, + "logps/rejected": -3544.2314453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.39727783203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.39727783203125, + "step": 479 + }, + { + "epoch": 5.052631578947368, + "grad_norm": 1.2025379874103237e-06, + "learning_rate": 0.00019006315789473686, + "logits/chosen": 13.158472061157227, + "logits/rejected": 13.158472061157227, + "logps/chosen": -3778.328125, + "logps/rejected": -3778.328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9637145996094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9637145996094, + "step": 480 + }, + { + "epoch": 5.063157894736842, + "grad_norm": 1.1906524832738796e-06, + "learning_rate": 0.0001900421052631579, + "logits/chosen": 13.154966354370117, + "logits/rejected": 13.154966354370117, + "logps/chosen": -3778.458984375, + "logps/rejected": -3778.458984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9767761230469, + "rewards/margins": 0.0, + "rewards/rejected": -374.9767761230469, + "step": 481 + }, + { + "epoch": 5.073684210526316, + "grad_norm": 8.846835157783062e-07, + "learning_rate": 0.00019002105263157896, + "logits/chosen": 13.16341495513916, + "logits/rejected": 13.16341495513916, + "logps/chosen": -2674.466796875, + "logps/rejected": -2674.466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.66510009765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.66510009765625, + "step": 482 + }, + { + "epoch": 5.08421052631579, + "grad_norm": 1.1412429330448504e-06, + "learning_rate": 0.00019, + "logits/chosen": 13.149027824401855, + "logits/rejected": 13.149027824401855, + "logps/chosen": -3779.0517578125, + "logps/rejected": -3779.0517578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.03607177734375, + "rewards/margins": 0.0, + "rewards/rejected": -375.03607177734375, + "step": 483 + }, + { + "epoch": 5.094736842105263, + "grad_norm": 1.4646574300059e-06, + "learning_rate": 0.00018997894736842106, + "logits/chosen": 13.149612426757812, + "logits/rejected": 13.149612426757812, + "logps/chosen": -4877.3212890625, + "logps/rejected": -4877.3212890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8849792480469, + "rewards/margins": 0.0, + "rewards/rejected": -484.8849792480469, + "step": 484 + }, + { + "epoch": 5.105263157894737, + "grad_norm": 9.852100220086868e-07, + "learning_rate": 0.0001899578947368421, + "logits/chosen": 13.141609191894531, + "logits/rejected": 13.141609191894531, + "logps/chosen": -3999.216796875, + "logps/rejected": -3999.216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0943603515625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0943603515625, + "step": 485 + }, + { + "epoch": 5.11578947368421, + "grad_norm": 1.259721102542244e-06, + "learning_rate": 0.00018993684210526316, + "logits/chosen": 13.151392936706543, + "logits/rejected": 13.151392936706543, + "logps/chosen": -4877.60009765625, + "logps/rejected": -4877.60009765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.912841796875, + "rewards/margins": 0.0, + "rewards/rejected": -484.912841796875, + "step": 486 + }, + { + "epoch": 5.126315789473685, + "grad_norm": 1.444942881789757e-06, + "learning_rate": 0.00018991578947368423, + "logits/chosen": 13.153428077697754, + "logits/rejected": 13.153428077697754, + "logps/chosen": -4877.8466796875, + "logps/rejected": -4877.8466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9375, + "rewards/margins": 0.0, + "rewards/rejected": -484.9375, + "step": 487 + }, + { + "epoch": 5.136842105263158, + "grad_norm": 1.651527099966188e-06, + "learning_rate": 0.00018989473684210528, + "logits/chosen": 13.148782730102539, + "logits/rejected": 13.148782730102539, + "logps/chosen": -3543.2548828125, + "logps/rejected": -3543.2548828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.29962158203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.29962158203125, + "step": 488 + }, + { + "epoch": 5.147368421052631, + "grad_norm": 2.338605099794222e-06, + "learning_rate": 0.0001898736842105263, + "logits/chosen": 13.183222770690918, + "logits/rejected": 13.183222770690918, + "logps/chosen": -5172.541015625, + "logps/rejected": -5172.541015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.315673828125, + "rewards/margins": 0.0, + "rewards/rejected": -514.315673828125, + "step": 489 + }, + { + "epoch": 5.157894736842105, + "grad_norm": 1.1970715831921552e-06, + "learning_rate": 0.00018985263157894738, + "logits/chosen": 13.153637886047363, + "logits/rejected": 13.153637886047363, + "logps/chosen": -3999.62109375, + "logps/rejected": -3999.62109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1347961425781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1347961425781, + "step": 490 + }, + { + "epoch": 5.168421052631579, + "grad_norm": 1.2377339544400456e-06, + "learning_rate": 0.00018983157894736843, + "logits/chosen": 13.158660888671875, + "logits/rejected": 13.158660888671875, + "logps/chosen": -3756.373046875, + "logps/rejected": -3756.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.718994140625, + "rewards/margins": 0.0, + "rewards/rejected": -372.718994140625, + "step": 491 + }, + { + "epoch": 5.178947368421053, + "grad_norm": 1.1482413810881553e-06, + "learning_rate": 0.00018981052631578948, + "logits/chosen": 13.168063163757324, + "logits/rejected": 13.168063163757324, + "logps/chosen": -2673.7548828125, + "logps/rejected": -2673.7548828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5939025878906, + "rewards/margins": 0.0, + "rewards/rejected": -264.5939025878906, + "step": 492 + }, + { + "epoch": 5.189473684210526, + "grad_norm": 1.0602715292407083e-06, + "learning_rate": 0.00018978947368421053, + "logits/chosen": 13.145636558532715, + "logits/rejected": 13.145636558532715, + "logps/chosen": -3999.296875, + "logps/rejected": -3999.296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1023864746094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1023864746094, + "step": 493 + }, + { + "epoch": 5.2, + "grad_norm": 2.178872819058597e-06, + "learning_rate": 0.0001897684210526316, + "logits/chosen": 13.171972274780273, + "logits/rejected": 13.171972274780273, + "logps/chosen": -5172.416015625, + "logps/rejected": -5172.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3031616210938, + "rewards/margins": 0.0, + "rewards/rejected": -514.3031616210938, + "step": 494 + }, + { + "epoch": 5.2105263157894735, + "grad_norm": 1.7866142343336833e-06, + "learning_rate": 0.00018974736842105266, + "logits/chosen": 13.150436401367188, + "logits/rejected": 13.150436401367188, + "logps/chosen": -4878.61376953125, + "logps/rejected": -4878.61376953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.01422119140625, + "rewards/margins": 0.0, + "rewards/rejected": -485.01422119140625, + "step": 495 + }, + { + "epoch": 5.221052631578948, + "grad_norm": 1.0862628414542996e-06, + "learning_rate": 0.00018972631578947368, + "logits/chosen": 13.153552055358887, + "logits/rejected": 13.153552055358887, + "logps/chosen": -3780.3046875, + "logps/rejected": -3780.3046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1613464355469, + "rewards/margins": 0.0, + "rewards/rejected": -375.1613464355469, + "step": 496 + }, + { + "epoch": 5.231578947368421, + "grad_norm": 1.1326321782689774e-06, + "learning_rate": 0.00018970526315789475, + "logits/chosen": 13.161069869995117, + "logits/rejected": 13.161069869995117, + "logps/chosen": -2966.298828125, + "logps/rejected": -2966.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9036865234375, + "rewards/margins": 0.0, + "rewards/rejected": -293.9036865234375, + "step": 497 + }, + { + "epoch": 5.242105263157895, + "grad_norm": 1.367556592413166e-06, + "learning_rate": 0.0001896842105263158, + "logits/chosen": 13.162830352783203, + "logits/rejected": 13.162830352783203, + "logps/chosen": -4286.22314453125, + "logps/rejected": -4286.22314453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.824951171875, + "rewards/margins": 0.0, + "rewards/rejected": -425.824951171875, + "step": 498 + }, + { + "epoch": 5.252631578947368, + "grad_norm": 1.0151137530556298e-06, + "learning_rate": 0.00018966315789473685, + "logits/chosen": 13.15966510772705, + "logits/rejected": 13.15966510772705, + "logps/chosen": -3999.76171875, + "logps/rejected": -3999.76171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.14886474609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.14886474609375, + "step": 499 + }, + { + "epoch": 5.2631578947368425, + "grad_norm": 1.204264094667451e-06, + "learning_rate": 0.0001896421052631579, + "logits/chosen": 13.15974235534668, + "logits/rejected": 13.15974235534668, + "logps/chosen": -3542.126953125, + "logps/rejected": -3542.126953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.18682861328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.18682861328125, + "step": 500 + }, + { + "epoch": 5.2631578947368425, + "eval_logits/chosen": 13.179182052612305, + "eval_logits/rejected": 13.179182052612305, + "eval_logps/chosen": -4309.7412109375, + "eval_logps/rejected": -4309.7412109375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.0709533691406, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.0709533691406, + "eval_runtime": 4.3709, + "eval_samples_per_second": 2.288, + "eval_steps_per_second": 2.288, + "step": 500 + }, + { + "epoch": 5.273684210526316, + "grad_norm": 1.5734547105239471e-06, + "learning_rate": 0.00018962105263157898, + "logits/chosen": 13.19274616241455, + "logits/rejected": 13.19274616241455, + "logps/chosen": -5172.8818359375, + "logps/rejected": -5172.8818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3497924804688, + "rewards/margins": 0.0, + "rewards/rejected": -514.3497924804688, + "step": 501 + }, + { + "epoch": 5.284210526315789, + "grad_norm": 1.6210292415053118e-06, + "learning_rate": 0.0001896, + "logits/chosen": 13.196554183959961, + "logits/rejected": 13.196554183959961, + "logps/chosen": -5172.888671875, + "logps/rejected": -5172.888671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3504638671875, + "rewards/margins": 0.0, + "rewards/rejected": -514.3504638671875, + "step": 502 + }, + { + "epoch": 5.294736842105263, + "grad_norm": 1.633610168028099e-06, + "learning_rate": 0.00018957894736842105, + "logits/chosen": 13.202737808227539, + "logits/rejected": 13.202737808227539, + "logps/chosen": -5172.873046875, + "logps/rejected": -5172.873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.348876953125, + "rewards/margins": 0.0, + "rewards/rejected": -514.348876953125, + "step": 503 + }, + { + "epoch": 5.3052631578947365, + "grad_norm": 1.2603325103555107e-06, + "learning_rate": 0.0001895578947368421, + "logits/chosen": 13.192971229553223, + "logits/rejected": 13.192971229553223, + "logps/chosen": -4324.396484375, + "logps/rejected": -4324.396484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.2547912597656, + "rewards/margins": 0.0, + "rewards/rejected": -429.2547912597656, + "step": 504 + }, + { + "epoch": 5.315789473684211, + "grad_norm": 1.2053794762323378e-06, + "learning_rate": 0.00018953684210526318, + "logits/chosen": 13.18458080291748, + "logits/rejected": 13.18458080291748, + "logps/chosen": -3999.69921875, + "logps/rejected": -3999.69921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1426086425781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1426086425781, + "step": 505 + }, + { + "epoch": 5.326315789473684, + "grad_norm": 1.2172538390586851e-06, + "learning_rate": 0.00018951578947368422, + "logits/chosen": 13.187020301818848, + "logits/rejected": 13.187020301818848, + "logps/chosen": -3999.52734375, + "logps/rejected": -3999.52734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.12542724609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.12542724609375, + "step": 506 + }, + { + "epoch": 5.336842105263158, + "grad_norm": 1.5117987004487077e-06, + "learning_rate": 0.00018949473684210527, + "logits/chosen": 13.192102432250977, + "logits/rejected": 13.192102432250977, + "logps/chosen": -2966.3984375, + "logps/rejected": -2966.3984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9136657714844, + "rewards/margins": 0.0, + "rewards/rejected": -293.9136657714844, + "step": 507 + }, + { + "epoch": 5.347368421052631, + "grad_norm": 1.1993967063972377e-06, + "learning_rate": 0.00018947368421052632, + "logits/chosen": 13.196795463562012, + "logits/rejected": 13.196795463562012, + "logps/chosen": -2672.544921875, + "logps/rejected": -2672.544921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.472900390625, + "rewards/margins": 0.0, + "rewards/rejected": -264.472900390625, + "step": 508 + }, + { + "epoch": 5.3578947368421055, + "grad_norm": 9.924002597472281e-07, + "learning_rate": 0.00018945263157894737, + "logits/chosen": 13.189982414245605, + "logits/rejected": 13.189982414245605, + "logps/chosen": -2672.9765625, + "logps/rejected": -2672.9765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5160827636719, + "rewards/margins": 0.0, + "rewards/rejected": -264.5160827636719, + "step": 509 + }, + { + "epoch": 5.368421052631579, + "grad_norm": 1.0404201020719483e-06, + "learning_rate": 0.00018943157894736842, + "logits/chosen": 13.169742584228516, + "logits/rejected": 13.169742584228516, + "logps/chosen": -3757.103515625, + "logps/rejected": -3757.103515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.79205322265625, + "rewards/margins": 0.0, + "rewards/rejected": -372.79205322265625, + "step": 510 + }, + { + "epoch": 5.378947368421053, + "grad_norm": 2.8191673209221335e-06, + "learning_rate": 0.00018941052631578947, + "logits/chosen": 13.166238784790039, + "logits/rejected": 13.166238784790039, + "logps/chosen": -4878.91748046875, + "logps/rejected": -4878.91748046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0445861816406, + "rewards/margins": 0.0, + "rewards/rejected": -485.0445861816406, + "step": 511 + }, + { + "epoch": 5.389473684210526, + "grad_norm": 1.0694178627090878e-06, + "learning_rate": 0.00018938947368421055, + "logits/chosen": 13.163310050964355, + "logits/rejected": 13.163310050964355, + "logps/chosen": -3757.392578125, + "logps/rejected": -3757.392578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8209533691406, + "rewards/margins": 0.0, + "rewards/rejected": -372.8209533691406, + "step": 512 + }, + { + "epoch": 5.4, + "grad_norm": 8.438643703811977e-07, + "learning_rate": 0.0001893684210526316, + "logits/chosen": 13.16645336151123, + "logits/rejected": 13.16645336151123, + "logps/chosen": -2967.0546875, + "logps/rejected": -2967.0546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9792785644531, + "rewards/margins": 0.0, + "rewards/rejected": -293.9792785644531, + "step": 513 + }, + { + "epoch": 5.410526315789474, + "grad_norm": 1.6386172774218721e-06, + "learning_rate": 0.00018934736842105265, + "logits/chosen": 13.19331169128418, + "logits/rejected": 13.19331169128418, + "logps/chosen": -5176.0, + "logps/rejected": -5176.0, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6615600585938, + "rewards/margins": 0.0, + "rewards/rejected": -514.6615600585938, + "step": 514 + }, + { + "epoch": 5.421052631578947, + "grad_norm": 1.3054219607511186e-06, + "learning_rate": 0.0001893263157894737, + "logits/chosen": 13.199670791625977, + "logits/rejected": 13.199670791625977, + "logps/chosen": -5176.2451171875, + "logps/rejected": -5176.2451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6860961914062, + "rewards/margins": 0.0, + "rewards/rejected": -514.6860961914062, + "step": 515 + }, + { + "epoch": 5.431578947368421, + "grad_norm": 1.3290152764966479e-06, + "learning_rate": 0.00018930526315789474, + "logits/chosen": 13.175046920776367, + "logits/rejected": 13.175046920776367, + "logps/chosen": -3542.28125, + "logps/rejected": -3542.28125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2022399902344, + "rewards/margins": 0.0, + "rewards/rejected": -351.2022399902344, + "step": 516 + }, + { + "epoch": 5.442105263157894, + "grad_norm": 1.8839148197002942e-06, + "learning_rate": 0.0001892842105263158, + "logits/chosen": 13.190823554992676, + "logits/rejected": 13.190823554992676, + "logps/chosen": -4287.044921875, + "logps/rejected": -4287.044921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9071350097656, + "rewards/margins": 0.0, + "rewards/rejected": -425.9071350097656, + "step": 517 + }, + { + "epoch": 5.4526315789473685, + "grad_norm": 1.6932900734900613e-06, + "learning_rate": 0.00018926315789473684, + "logits/chosen": 13.195592880249023, + "logits/rejected": 13.195592880249023, + "logps/chosen": -3776.611328125, + "logps/rejected": -3776.611328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7920227050781, + "rewards/margins": 0.0, + "rewards/rejected": -374.7920227050781, + "step": 518 + }, + { + "epoch": 5.463157894736842, + "grad_norm": 1.7988933223023196e-06, + "learning_rate": 0.00018924210526315792, + "logits/chosen": 13.198700904846191, + "logits/rejected": 13.198700904846191, + "logps/chosen": -4287.357421875, + "logps/rejected": -4287.357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9383850097656, + "rewards/margins": 0.0, + "rewards/rejected": -425.9383850097656, + "step": 519 + }, + { + "epoch": 5.473684210526316, + "grad_norm": 1.1877934866788564e-06, + "learning_rate": 0.00018922105263157897, + "logits/chosen": 13.20849609375, + "logits/rejected": 13.20849609375, + "logps/chosen": -4325.990234375, + "logps/rejected": -4325.990234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4141540527344, + "rewards/margins": 0.0, + "rewards/rejected": -429.4141540527344, + "step": 520 + }, + { + "epoch": 5.484210526315789, + "grad_norm": 1.2321351050559315e-06, + "learning_rate": 0.0001892, + "logits/chosen": 13.199177742004395, + "logits/rejected": 13.199177742004395, + "logps/chosen": -2967.66796875, + "logps/rejected": -2967.66796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0406188964844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0406188964844, + "step": 521 + }, + { + "epoch": 5.494736842105263, + "grad_norm": 9.345473017674522e-07, + "learning_rate": 0.00018917894736842107, + "logits/chosen": 13.190621376037598, + "logits/rejected": 13.190621376037598, + "logps/chosen": -3542.87109375, + "logps/rejected": -3542.87109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.26123046875, + "rewards/margins": 0.0, + "rewards/rejected": -351.26123046875, + "step": 522 + }, + { + "epoch": 5.505263157894737, + "grad_norm": 1.3391311313171173e-06, + "learning_rate": 0.00018915789473684212, + "logits/chosen": 13.189898490905762, + "logits/rejected": 13.189898490905762, + "logps/chosen": -3997.154296875, + "logps/rejected": -3997.154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.88812255859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.88812255859375, + "step": 523 + }, + { + "epoch": 5.515789473684211, + "grad_norm": 8.558351964893518e-07, + "learning_rate": 0.00018913684210526317, + "logits/chosen": 13.18808650970459, + "logits/rejected": 13.18808650970459, + "logps/chosen": -3543.158203125, + "logps/rejected": -3543.158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2899475097656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2899475097656, + "step": 524 + }, + { + "epoch": 5.526315789473684, + "grad_norm": 2.4220507839345373e-06, + "learning_rate": 0.00018911578947368422, + "logits/chosen": 13.198655128479004, + "logits/rejected": 13.198655128479004, + "logps/chosen": -4878.38330078125, + "logps/rejected": -4878.38330078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9911804199219, + "rewards/margins": 0.0, + "rewards/rejected": -484.9911804199219, + "step": 525 + }, + { + "epoch": 5.536842105263158, + "grad_norm": 8.740883572500024e-07, + "learning_rate": 0.0001890947368421053, + "logits/chosen": 13.210992813110352, + "logits/rejected": 13.210992813110352, + "logps/chosen": -2673.4677734375, + "logps/rejected": -2673.4677734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.565185546875, + "rewards/margins": 0.0, + "rewards/rejected": -264.565185546875, + "step": 526 + }, + { + "epoch": 5.5473684210526315, + "grad_norm": 8.148468282342947e-07, + "learning_rate": 0.00018907368421052631, + "logits/chosen": 13.205110549926758, + "logits/rejected": 13.205110549926758, + "logps/chosen": -2968.787109375, + "logps/rejected": -2968.787109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.15252685546875, + "rewards/margins": 0.0, + "rewards/rejected": -294.15252685546875, + "step": 527 + }, + { + "epoch": 5.557894736842105, + "grad_norm": 1.4878780802973779e-06, + "learning_rate": 0.00018905263157894736, + "logits/chosen": 13.235568046569824, + "logits/rejected": 13.235568046569824, + "logps/chosen": -5176.36572265625, + "logps/rejected": -5176.36572265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6981811523438, + "rewards/margins": 0.0, + "rewards/rejected": -514.6981811523438, + "step": 528 + }, + { + "epoch": 5.568421052631579, + "grad_norm": 1.1538711532921297e-06, + "learning_rate": 0.00018903157894736844, + "logits/chosen": 13.207457542419434, + "logits/rejected": 13.207457542419434, + "logps/chosen": -3996.5703125, + "logps/rejected": -3996.5703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8297119140625, + "rewards/margins": 0.0, + "rewards/rejected": -396.8297119140625, + "step": 529 + }, + { + "epoch": 5.578947368421053, + "grad_norm": 1.1659416259135469e-06, + "learning_rate": 0.0001890105263157895, + "logits/chosen": 13.22602367401123, + "logits/rejected": 13.22602367401123, + "logps/chosen": -2673.1181640625, + "logps/rejected": -2673.1181640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5302429199219, + "rewards/margins": 0.0, + "rewards/rejected": -264.5302429199219, + "step": 530 + }, + { + "epoch": 5.589473684210526, + "grad_norm": 1.1985692935922998e-06, + "learning_rate": 0.00018898947368421054, + "logits/chosen": 13.208648681640625, + "logits/rejected": 13.208648681640625, + "logps/chosen": -3996.619140625, + "logps/rejected": -3996.619140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8345947265625, + "rewards/margins": 0.0, + "rewards/rejected": -396.8345947265625, + "step": 531 + }, + { + "epoch": 5.6, + "grad_norm": 1.1034218232452986e-06, + "learning_rate": 0.0001889684210526316, + "logits/chosen": 13.220032691955566, + "logits/rejected": 13.220032691955566, + "logps/chosen": -4327.482421875, + "logps/rejected": -4327.482421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.5633850097656, + "rewards/margins": 0.0, + "rewards/rejected": -429.5633850097656, + "step": 532 + }, + { + "epoch": 5.610526315789474, + "grad_norm": 1.2875439097115304e-06, + "learning_rate": 0.00018894736842105266, + "logits/chosen": 13.214885711669922, + "logits/rejected": 13.214885711669922, + "logps/chosen": -2673.916015625, + "logps/rejected": -2673.916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6100158691406, + "rewards/margins": 0.0, + "rewards/rejected": -264.6100158691406, + "step": 533 + }, + { + "epoch": 5.621052631578947, + "grad_norm": 1.3083537169222836e-06, + "learning_rate": 0.00018892631578947369, + "logits/chosen": 13.204473495483398, + "logits/rejected": 13.204473495483398, + "logps/chosen": -4877.75927734375, + "logps/rejected": -4877.75927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.92877197265625, + "rewards/margins": 0.0, + "rewards/rejected": -484.92877197265625, + "step": 534 + }, + { + "epoch": 5.631578947368421, + "grad_norm": 1.1200177141290624e-06, + "learning_rate": 0.00018890526315789473, + "logits/chosen": 13.193305015563965, + "logits/rejected": 13.193305015563965, + "logps/chosen": -2969.091796875, + "logps/rejected": -2969.091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1829833984375, + "rewards/margins": 0.0, + "rewards/rejected": -294.1829833984375, + "step": 535 + }, + { + "epoch": 5.6421052631578945, + "grad_norm": 1.3789167496724986e-06, + "learning_rate": 0.00018888421052631578, + "logits/chosen": 13.215169906616211, + "logits/rejected": 13.215169906616211, + "logps/chosen": -5176.072265625, + "logps/rejected": -5176.072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6688232421875, + "rewards/margins": 0.0, + "rewards/rejected": -514.6688232421875, + "step": 536 + }, + { + "epoch": 5.652631578947369, + "grad_norm": 1.4613106031902134e-06, + "learning_rate": 0.00018886315789473686, + "logits/chosen": 13.212922096252441, + "logits/rejected": 13.212922096252441, + "logps/chosen": -5176.0166015625, + "logps/rejected": -5176.0166015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6632690429688, + "rewards/margins": 0.0, + "rewards/rejected": -514.6632690429688, + "step": 537 + }, + { + "epoch": 5.663157894736842, + "grad_norm": 1.2394501709422912e-06, + "learning_rate": 0.0001888421052631579, + "logits/chosen": 13.185783386230469, + "logits/rejected": 13.185783386230469, + "logps/chosen": -4289.8154296875, + "logps/rejected": -4289.8154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1841735839844, + "rewards/margins": 0.0, + "rewards/rejected": -426.1841735839844, + "step": 538 + }, + { + "epoch": 5.673684210526316, + "grad_norm": 8.939475151237275e-07, + "learning_rate": 0.00018882105263157896, + "logits/chosen": 13.183499336242676, + "logits/rejected": 13.183499336242676, + "logps/chosen": -3543.587890625, + "logps/rejected": -3543.587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3329162597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3329162597656, + "step": 539 + }, + { + "epoch": 5.684210526315789, + "grad_norm": 9.325666496806662e-07, + "learning_rate": 0.0001888, + "logits/chosen": 13.191852569580078, + "logits/rejected": 13.191852569580078, + "logps/chosen": -3758.19140625, + "logps/rejected": -3758.19140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9008483886719, + "rewards/margins": 0.0, + "rewards/rejected": -372.9008483886719, + "step": 540 + }, + { + "epoch": 5.6947368421052635, + "grad_norm": 9.438648476134404e-07, + "learning_rate": 0.00018877894736842106, + "logits/chosen": 13.193848609924316, + "logits/rejected": 13.193848609924316, + "logps/chosen": -3758.2880859375, + "logps/rejected": -3758.2880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9104919433594, + "rewards/margins": 0.0, + "rewards/rejected": -372.9104919433594, + "step": 541 + }, + { + "epoch": 5.705263157894737, + "grad_norm": 1.2441748822311638e-06, + "learning_rate": 0.0001887578947368421, + "logits/chosen": 13.200854301452637, + "logits/rejected": 13.200854301452637, + "logps/chosen": -4877.8212890625, + "logps/rejected": -4877.8212890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9349670410156, + "rewards/margins": 0.0, + "rewards/rejected": -484.9349670410156, + "step": 542 + }, + { + "epoch": 5.715789473684211, + "grad_norm": 1.0261360330332536e-06, + "learning_rate": 0.00018873684210526316, + "logits/chosen": 13.206197738647461, + "logits/rejected": 13.206197738647461, + "logps/chosen": -2674.3232421875, + "logps/rejected": -2674.3232421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6507263183594, + "rewards/margins": 0.0, + "rewards/rejected": -264.6507263183594, + "step": 543 + }, + { + "epoch": 5.726315789473684, + "grad_norm": 1.1727012179107987e-06, + "learning_rate": 0.00018871578947368423, + "logits/chosen": 13.196168899536133, + "logits/rejected": 13.196168899536133, + "logps/chosen": -4290.02783203125, + "logps/rejected": -4290.02783203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.2054138183594, + "rewards/margins": 0.0, + "rewards/rejected": -426.2054138183594, + "step": 544 + }, + { + "epoch": 5.7368421052631575, + "grad_norm": 1.2502268873504363e-06, + "learning_rate": 0.00018869473684210528, + "logits/chosen": 13.202901840209961, + "logits/rejected": 13.202901840209961, + "logps/chosen": -4877.98779296875, + "logps/rejected": -4877.98779296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9516296386719, + "rewards/margins": 0.0, + "rewards/rejected": -484.9516296386719, + "step": 545 + }, + { + "epoch": 5.747368421052632, + "grad_norm": 1.3137529322193586e-06, + "learning_rate": 0.00018867368421052633, + "logits/chosen": 13.226284980773926, + "logits/rejected": 13.226284980773926, + "logps/chosen": -5176.2158203125, + "logps/rejected": -5176.2158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6831665039062, + "rewards/margins": 0.0, + "rewards/rejected": -514.6831665039062, + "step": 546 + }, + { + "epoch": 5.757894736842105, + "grad_norm": 1.1767713203880703e-06, + "learning_rate": 0.00018865263157894738, + "logits/chosen": 13.194772720336914, + "logits/rejected": 13.194772720336914, + "logps/chosen": -3996.72265625, + "logps/rejected": -3996.72265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8449401855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.8449401855469, + "step": 547 + }, + { + "epoch": 5.768421052631579, + "grad_norm": 1.2771665751643013e-06, + "learning_rate": 0.00018863157894736843, + "logits/chosen": 13.207627296447754, + "logits/rejected": 13.207627296447754, + "logps/chosen": -4878.583984375, + "logps/rejected": -4878.583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.01123046875, + "rewards/margins": 0.0, + "rewards/rejected": -485.01123046875, + "step": 548 + }, + { + "epoch": 5.778947368421052, + "grad_norm": 1.0961126690745004e-06, + "learning_rate": 0.00018861052631578948, + "logits/chosen": 13.201512336730957, + "logits/rejected": 13.201512336730957, + "logps/chosen": -3759.333984375, + "logps/rejected": -3759.333984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0151062011719, + "rewards/margins": 0.0, + "rewards/rejected": -373.0151062011719, + "step": 549 + }, + { + "epoch": 5.7894736842105265, + "grad_norm": 1.263503122572729e-06, + "learning_rate": 0.00018858947368421053, + "logits/chosen": 13.207401275634766, + "logits/rejected": 13.207401275634766, + "logps/chosen": -4878.796875, + "logps/rejected": -4878.796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.03253173828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.03253173828125, + "step": 550 + }, + { + "epoch": 5.7894736842105265, + "eval_logits/chosen": 13.214508056640625, + "eval_logits/rejected": 13.214508056640625, + "eval_logps/chosen": -4310.3515625, + "eval_logps/rejected": -4310.3515625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.1319885253906, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.1319885253906, + "eval_runtime": 4.0248, + "eval_samples_per_second": 2.485, + "eval_steps_per_second": 2.485, + "step": 550 + }, + { + "epoch": 5.8, + "grad_norm": 1.1792767509177793e-06, + "learning_rate": 0.0001885684210526316, + "logits/chosen": 13.194851875305176, + "logits/rejected": 13.194851875305176, + "logps/chosen": -3542.806640625, + "logps/rejected": -3542.806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2547912597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2547912597656, + "step": 551 + }, + { + "epoch": 5.810526315789474, + "grad_norm": 1.3743325553150498e-06, + "learning_rate": 0.00018854736842105265, + "logits/chosen": 13.200400352478027, + "logits/rejected": 13.200400352478027, + "logps/chosen": -3775.3046875, + "logps/rejected": -3775.3046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.6613464355469, + "rewards/margins": 0.0, + "rewards/rejected": -374.6613464355469, + "step": 552 + }, + { + "epoch": 5.821052631578947, + "grad_norm": 1.205835133077926e-06, + "learning_rate": 0.00018852631578947368, + "logits/chosen": 13.209718704223633, + "logits/rejected": 13.209718704223633, + "logps/chosen": -4879.5869140625, + "logps/rejected": -4879.5869140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1115417480469, + "rewards/margins": 0.0, + "rewards/rejected": -485.1115417480469, + "step": 553 + }, + { + "epoch": 5.831578947368421, + "grad_norm": 1.1669042123685358e-06, + "learning_rate": 0.00018850526315789475, + "logits/chosen": 13.198144912719727, + "logits/rejected": 13.198144912719727, + "logps/chosen": -3996.380859375, + "logps/rejected": -3996.380859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8107604980469, + "rewards/margins": 0.0, + "rewards/rejected": -396.8107604980469, + "step": 554 + }, + { + "epoch": 5.842105263157895, + "grad_norm": 1.0688269185266108e-06, + "learning_rate": 0.0001884842105263158, + "logits/chosen": 13.211125373840332, + "logits/rejected": 13.211125373840332, + "logps/chosen": -2672.744140625, + "logps/rejected": -2672.744140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4928283691406, + "rewards/margins": 0.0, + "rewards/rejected": -264.4928283691406, + "step": 555 + }, + { + "epoch": 5.852631578947369, + "grad_norm": 1.1227840559513425e-06, + "learning_rate": 0.00018846315789473685, + "logits/chosen": 13.205501556396484, + "logits/rejected": 13.205501556396484, + "logps/chosen": -4879.8291015625, + "logps/rejected": -4879.8291015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1357421875, + "rewards/margins": 0.0, + "rewards/rejected": -485.1357421875, + "step": 556 + }, + { + "epoch": 5.863157894736842, + "grad_norm": 1.156195594376186e-06, + "learning_rate": 0.0001884421052631579, + "logits/chosen": 13.189672470092773, + "logits/rejected": 13.189672470092773, + "logps/chosen": -3996.57421875, + "logps/rejected": -3996.57421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8301086425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8301086425781, + "step": 557 + }, + { + "epoch": 5.873684210526315, + "grad_norm": 1.4351783192978473e-06, + "learning_rate": 0.00018842105263157898, + "logits/chosen": 13.2205171585083, + "logits/rejected": 13.2205171585083, + "logps/chosen": -5175.4443359375, + "logps/rejected": -5175.4443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6060180664062, + "rewards/margins": 0.0, + "rewards/rejected": -514.6060180664062, + "step": 558 + }, + { + "epoch": 5.88421052631579, + "grad_norm": 1.3104540812491905e-06, + "learning_rate": 0.0001884, + "logits/chosen": 13.188840866088867, + "logits/rejected": 13.188840866088867, + "logps/chosen": -3776.046875, + "logps/rejected": -3776.046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7355651855469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7355651855469, + "step": 559 + }, + { + "epoch": 5.894736842105263, + "grad_norm": 1.1253225693508284e-06, + "learning_rate": 0.00018837894736842105, + "logits/chosen": 13.184172630310059, + "logits/rejected": 13.184172630310059, + "logps/chosen": -3540.91015625, + "logps/rejected": -3540.91015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0651550292969, + "rewards/margins": 0.0, + "rewards/rejected": -351.0651550292969, + "step": 560 + }, + { + "epoch": 5.905263157894737, + "grad_norm": 1.3087112620269181e-06, + "learning_rate": 0.00018835789473684212, + "logits/chosen": 13.200813293457031, + "logits/rejected": 13.200813293457031, + "logps/chosen": -4323.94140625, + "logps/rejected": -4323.94140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.20928955078125, + "rewards/margins": 0.0, + "rewards/rejected": -429.20928955078125, + "step": 561 + }, + { + "epoch": 5.91578947368421, + "grad_norm": 1.20750951282389e-06, + "learning_rate": 0.00018833684210526317, + "logits/chosen": 13.20124340057373, + "logits/rejected": 13.20124340057373, + "logps/chosen": -4880.8564453125, + "logps/rejected": -4880.8564453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2384948730469, + "rewards/margins": 0.0, + "rewards/rejected": -485.2384948730469, + "step": 562 + }, + { + "epoch": 5.926315789473684, + "grad_norm": 1.3961901004222455e-06, + "learning_rate": 0.00018831578947368422, + "logits/chosen": 13.191816329956055, + "logits/rejected": 13.191816329956055, + "logps/chosen": -3540.873046875, + "logps/rejected": -3540.873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0614318847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.0614318847656, + "step": 563 + }, + { + "epoch": 5.936842105263158, + "grad_norm": 1.2689156392298173e-06, + "learning_rate": 0.00018829473684210527, + "logits/chosen": 13.211150169372559, + "logits/rejected": 13.211150169372559, + "logps/chosen": -4324.201171875, + "logps/rejected": -4324.201171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.2352600097656, + "rewards/margins": 0.0, + "rewards/rejected": -429.2352600097656, + "step": 564 + }, + { + "epoch": 5.947368421052632, + "grad_norm": 1.2820396477764007e-06, + "learning_rate": 0.00018827368421052635, + "logits/chosen": 13.198720932006836, + "logits/rejected": 13.198720932006836, + "logps/chosen": -3997.638671875, + "logps/rejected": -3997.638671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9365539550781, + "rewards/margins": 0.0, + "rewards/rejected": -396.9365539550781, + "step": 565 + }, + { + "epoch": 5.957894736842105, + "grad_norm": 1.4112313238001661e-06, + "learning_rate": 0.00018825263157894737, + "logits/chosen": 13.202353477478027, + "logits/rejected": 13.202353477478027, + "logps/chosen": -4287.662109375, + "logps/rejected": -4287.662109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9688415527344, + "rewards/margins": 0.0, + "rewards/rejected": -425.9688415527344, + "step": 566 + }, + { + "epoch": 5.968421052631579, + "grad_norm": 1.1650764690784854e-06, + "learning_rate": 0.00018823157894736842, + "logits/chosen": 13.19483757019043, + "logits/rejected": 13.19483757019043, + "logps/chosen": -3998.044921875, + "logps/rejected": -3998.044921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9771728515625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9771728515625, + "step": 567 + }, + { + "epoch": 5.978947368421053, + "grad_norm": 1.1852655461552786e-06, + "learning_rate": 0.00018821052631578947, + "logits/chosen": 13.189155578613281, + "logits/rejected": 13.189155578613281, + "logps/chosen": -3541.6953125, + "logps/rejected": -3541.6953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1436462402344, + "rewards/margins": 0.0, + "rewards/rejected": -351.1436462402344, + "step": 568 + }, + { + "epoch": 5.989473684210527, + "grad_norm": 1.0672247299226e-06, + "learning_rate": 0.00018818947368421055, + "logits/chosen": 13.18425178527832, + "logits/rejected": 13.18425178527832, + "logps/chosen": -3541.740234375, + "logps/rejected": -3541.740234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1481628417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.1481628417969, + "step": 569 + }, + { + "epoch": 6.0, + "grad_norm": 9.56930307438597e-07, + "learning_rate": 0.0001881684210526316, + "logits/chosen": 13.181585311889648, + "logits/rejected": 13.181585311889648, + "logps/chosen": -3542.328125, + "logps/rejected": -3542.328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2069396972656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2069396972656, + "step": 570 + }, + { + "epoch": 6.010526315789473, + "grad_norm": 8.95854043392319e-07, + "learning_rate": 0.00018814736842105264, + "logits/chosen": 13.18505573272705, + "logits/rejected": 13.18505573272705, + "logps/chosen": -2967.0078125, + "logps/rejected": -2967.0078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9745788574219, + "rewards/margins": 0.0, + "rewards/rejected": -293.9745788574219, + "step": 571 + }, + { + "epoch": 6.021052631578947, + "grad_norm": 1.677127329458017e-06, + "learning_rate": 0.0001881263157894737, + "logits/chosen": 13.182439804077148, + "logits/rejected": 13.182439804077148, + "logps/chosen": -4287.9873046875, + "logps/rejected": -4287.9873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0013732910156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0013732910156, + "step": 572 + }, + { + "epoch": 6.031578947368421, + "grad_norm": 7.974176128300314e-07, + "learning_rate": 0.00018810526315789474, + "logits/chosen": 13.184743881225586, + "logits/rejected": 13.184743881225586, + "logps/chosen": -2967.509765625, + "logps/rejected": -2967.509765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0247802734375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0247802734375, + "step": 573 + }, + { + "epoch": 6.042105263157895, + "grad_norm": 1.8033014157481375e-06, + "learning_rate": 0.0001880842105263158, + "logits/chosen": 13.193608283996582, + "logits/rejected": 13.193608283996582, + "logps/chosen": -4880.2822265625, + "logps/rejected": -4880.2822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1810607910156, + "rewards/margins": 0.0, + "rewards/rejected": -485.1810607910156, + "step": 574 + }, + { + "epoch": 6.052631578947368, + "grad_norm": 9.459404282097239e-07, + "learning_rate": 0.00018806315789473684, + "logits/chosen": 13.190689086914062, + "logits/rejected": 13.190689086914062, + "logps/chosen": -3758.173828125, + "logps/rejected": -3758.173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8990783691406, + "rewards/margins": 0.0, + "rewards/rejected": -372.8990783691406, + "step": 575 + }, + { + "epoch": 6.063157894736842, + "grad_norm": 1.5308801266655792e-06, + "learning_rate": 0.00018804210526315792, + "logits/chosen": 13.205794334411621, + "logits/rejected": 13.205794334411621, + "logps/chosen": -4326.60546875, + "logps/rejected": -4326.60546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4756774902344, + "rewards/margins": 0.0, + "rewards/rejected": -429.4756774902344, + "step": 576 + }, + { + "epoch": 6.073684210526316, + "grad_norm": 1.3134667824488133e-06, + "learning_rate": 0.00018802105263157897, + "logits/chosen": 13.210139274597168, + "logits/rejected": 13.210139274597168, + "logps/chosen": -4879.81494140625, + "logps/rejected": -4879.81494140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.13433837890625, + "rewards/margins": 0.0, + "rewards/rejected": -485.13433837890625, + "step": 577 + }, + { + "epoch": 6.08421052631579, + "grad_norm": 1.1836641533591319e-06, + "learning_rate": 0.000188, + "logits/chosen": 13.219319343566895, + "logits/rejected": 13.219319343566895, + "logps/chosen": -4326.8193359375, + "logps/rejected": -4326.8193359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4970703125, + "rewards/margins": 0.0, + "rewards/rejected": -429.4970703125, + "step": 578 + }, + { + "epoch": 6.094736842105263, + "grad_norm": 1.9596509446273558e-06, + "learning_rate": 0.00018797894736842107, + "logits/chosen": 13.209967613220215, + "logits/rejected": 13.209967613220215, + "logps/chosen": -3543.5205078125, + "logps/rejected": -3543.5205078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.326171875, + "rewards/margins": 0.0, + "rewards/rejected": -351.326171875, + "step": 579 + }, + { + "epoch": 6.105263157894737, + "grad_norm": 1.732796249598323e-06, + "learning_rate": 0.00018795789473684211, + "logits/chosen": 13.212206840515137, + "logits/rejected": 13.212206840515137, + "logps/chosen": -3998.2734375, + "logps/rejected": -3998.2734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0000305175781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0000305175781, + "step": 580 + }, + { + "epoch": 6.11578947368421, + "grad_norm": 1.523095647826267e-06, + "learning_rate": 0.00018793684210526316, + "logits/chosen": 13.212565422058105, + "logits/rejected": 13.212565422058105, + "logps/chosen": -4288.8076171875, + "logps/rejected": -4288.8076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0834045410156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0834045410156, + "step": 581 + }, + { + "epoch": 6.126315789473685, + "grad_norm": 1.6778822100604884e-06, + "learning_rate": 0.0001879157894736842, + "logits/chosen": 13.23731803894043, + "logits/rejected": 13.23731803894043, + "logps/chosen": -5173.009765625, + "logps/rejected": -5173.009765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.362548828125, + "rewards/margins": 0.0, + "rewards/rejected": -514.362548828125, + "step": 582 + }, + { + "epoch": 6.136842105263158, + "grad_norm": 1.2378862948025926e-06, + "learning_rate": 0.0001878947368421053, + "logits/chosen": 13.20161247253418, + "logits/rejected": 13.20161247253418, + "logps/chosen": -3777.7470703125, + "logps/rejected": -3777.7470703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9056091308594, + "rewards/margins": 0.0, + "rewards/rejected": -374.9056091308594, + "step": 583 + }, + { + "epoch": 6.147368421052631, + "grad_norm": 9.320282288172166e-07, + "learning_rate": 0.00018787368421052634, + "logits/chosen": 13.207496643066406, + "logits/rejected": 13.207496643066406, + "logps/chosen": -2672.783203125, + "logps/rejected": -2672.783203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4967346191406, + "rewards/margins": 0.0, + "rewards/rejected": -264.4967346191406, + "step": 584 + }, + { + "epoch": 6.157894736842105, + "grad_norm": 8.268946203315863e-07, + "learning_rate": 0.00018785263157894736, + "logits/chosen": 13.189412117004395, + "logits/rejected": 13.189412117004395, + "logps/chosen": -3544.30859375, + "logps/rejected": -3544.30859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4049987792969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4049987792969, + "step": 585 + }, + { + "epoch": 6.168421052631579, + "grad_norm": 1.178776187771291e-06, + "learning_rate": 0.00018783157894736844, + "logits/chosen": 13.191641807556152, + "logits/rejected": 13.191641807556152, + "logps/chosen": -3758.3203125, + "logps/rejected": -3758.3203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9137268066406, + "rewards/margins": 0.0, + "rewards/rejected": -372.9137268066406, + "step": 586 + }, + { + "epoch": 6.178947368421053, + "grad_norm": 3.134376356683788e-06, + "learning_rate": 0.0001878105263157895, + "logits/chosen": 13.220187187194824, + "logits/rejected": 13.220187187194824, + "logps/chosen": -5172.9501953125, + "logps/rejected": -5172.9501953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3566284179688, + "rewards/margins": 0.0, + "rewards/rejected": -514.3566284179688, + "step": 587 + }, + { + "epoch": 6.189473684210526, + "grad_norm": 9.482012615080748e-07, + "learning_rate": 0.00018778947368421054, + "logits/chosen": 13.199373245239258, + "logits/rejected": 13.199373245239258, + "logps/chosen": -3758.64453125, + "logps/rejected": -3758.64453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9461364746094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9461364746094, + "step": 588 + }, + { + "epoch": 6.2, + "grad_norm": 1.042478515955736e-06, + "learning_rate": 0.00018776842105263159, + "logits/chosen": 13.214068412780762, + "logits/rejected": 13.214068412780762, + "logps/chosen": -2673.265625, + "logps/rejected": -2673.265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.54498291015625, + "rewards/margins": 0.0, + "rewards/rejected": -264.54498291015625, + "step": 589 + }, + { + "epoch": 6.2105263157894735, + "grad_norm": 1.3472722457663622e-06, + "learning_rate": 0.00018774736842105266, + "logits/chosen": 13.20860481262207, + "logits/rejected": 13.20860481262207, + "logps/chosen": -3777.83203125, + "logps/rejected": -3777.83203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9140930175781, + "rewards/margins": 0.0, + "rewards/rejected": -374.9140930175781, + "step": 590 + }, + { + "epoch": 6.221052631578948, + "grad_norm": 1.472659732826287e-06, + "learning_rate": 0.00018772631578947368, + "logits/chosen": 13.211287498474121, + "logits/rejected": 13.211287498474121, + "logps/chosen": -3777.96484375, + "logps/rejected": -3777.96484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9273681640625, + "rewards/margins": 0.0, + "rewards/rejected": -374.9273681640625, + "step": 591 + }, + { + "epoch": 6.231578947368421, + "grad_norm": 1.5024987760625663e-06, + "learning_rate": 0.00018770526315789473, + "logits/chosen": 13.221588134765625, + "logits/rejected": 13.221588134765625, + "logps/chosen": -2673.189453125, + "logps/rejected": -2673.189453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.537353515625, + "rewards/margins": 0.0, + "rewards/rejected": -264.537353515625, + "step": 592 + }, + { + "epoch": 6.242105263157895, + "grad_norm": 1.4282520623964956e-06, + "learning_rate": 0.00018768421052631578, + "logits/chosen": 13.205028533935547, + "logits/rejected": 13.205028533935547, + "logps/chosen": -3996.93359375, + "logps/rejected": -3996.93359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8660583496094, + "rewards/margins": 0.0, + "rewards/rejected": -396.8660583496094, + "step": 593 + }, + { + "epoch": 6.252631578947368, + "grad_norm": 1.1856502624141285e-06, + "learning_rate": 0.00018766315789473686, + "logits/chosen": 13.20934009552002, + "logits/rejected": 13.20934009552002, + "logps/chosen": -2673.9345703125, + "logps/rejected": -2673.9345703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.61187744140625, + "rewards/margins": 0.0, + "rewards/rejected": -264.61187744140625, + "step": 594 + }, + { + "epoch": 6.2631578947368425, + "grad_norm": 9.582586244505364e-07, + "learning_rate": 0.0001876421052631579, + "logits/chosen": 13.18975830078125, + "logits/rejected": 13.18975830078125, + "logps/chosen": -2969.1669921875, + "logps/rejected": -2969.1669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1905212402344, + "rewards/margins": 0.0, + "rewards/rejected": -294.1905212402344, + "step": 595 + }, + { + "epoch": 6.273684210526316, + "grad_norm": 1.8523847984397435e-06, + "learning_rate": 0.00018762105263157896, + "logits/chosen": 13.210691452026367, + "logits/rejected": 13.210691452026367, + "logps/chosen": -5173.56396484375, + "logps/rejected": -5173.56396484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.41796875, + "rewards/margins": 0.0, + "rewards/rejected": -514.41796875, + "step": 596 + }, + { + "epoch": 6.284210526315789, + "grad_norm": 8.66443656377669e-07, + "learning_rate": 0.0001876, + "logits/chosen": 13.170330047607422, + "logits/rejected": 13.170330047607422, + "logps/chosen": -3544.0205078125, + "logps/rejected": -3544.0205078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3761901855469, + "rewards/margins": 0.0, + "rewards/rejected": -351.3761901855469, + "step": 597 + }, + { + "epoch": 6.294736842105263, + "grad_norm": 1.7496446389486664e-06, + "learning_rate": 0.00018757894736842106, + "logits/chosen": 13.168196678161621, + "logits/rejected": 13.168196678161621, + "logps/chosen": -3996.181640625, + "logps/rejected": -3996.181640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7908630371094, + "rewards/margins": 0.0, + "rewards/rejected": -396.7908630371094, + "step": 598 + }, + { + "epoch": 6.3052631578947365, + "grad_norm": 1.440081859982456e-06, + "learning_rate": 0.0001875578947368421, + "logits/chosen": 13.170392036437988, + "logits/rejected": 13.170392036437988, + "logps/chosen": -3779.1103515625, + "logps/rejected": -3779.1103515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.04193115234375, + "rewards/margins": 0.0, + "rewards/rejected": -375.04193115234375, + "step": 599 + }, + { + "epoch": 6.315789473684211, + "grad_norm": 1.4733492434970685e-06, + "learning_rate": 0.00018753684210526315, + "logits/chosen": 13.181976318359375, + "logits/rejected": 13.181976318359375, + "logps/chosen": -4876.5595703125, + "logps/rejected": -4876.5595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8088073730469, + "rewards/margins": 0.0, + "rewards/rejected": -484.8088073730469, + "step": 600 + }, + { + "epoch": 6.315789473684211, + "eval_logits/chosen": 13.191164016723633, + "eval_logits/rejected": 13.191164016723633, + "eval_logps/chosen": -4310.07666015625, + "eval_logps/rejected": -4310.07666015625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.1044921875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.1044921875, + "eval_runtime": 4.1623, + "eval_samples_per_second": 2.403, + "eval_steps_per_second": 2.403, + "step": 600 + }, + { + "epoch": 6.326315789473684, + "grad_norm": 1.0408135722173029e-06, + "learning_rate": 0.00018751578947368423, + "logits/chosen": 13.175030708312988, + "logits/rejected": 13.175030708312988, + "logps/chosen": -2968.7265625, + "logps/rejected": -2968.7265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1464538574219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1464538574219, + "step": 601 + }, + { + "epoch": 6.336842105263158, + "grad_norm": 2.0444217625481542e-06, + "learning_rate": 0.00018749473684210528, + "logits/chosen": 13.209887504577637, + "logits/rejected": 13.209887504577637, + "logps/chosen": -5173.6474609375, + "logps/rejected": -5173.6474609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4263305664062, + "rewards/margins": 0.0, + "rewards/rejected": -514.4263305664062, + "step": 602 + }, + { + "epoch": 6.347368421052631, + "grad_norm": 1.2794157555617858e-06, + "learning_rate": 0.00018747368421052633, + "logits/chosen": 13.186957359313965, + "logits/rejected": 13.186957359313965, + "logps/chosen": -2674.6982421875, + "logps/rejected": -2674.6982421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.688232421875, + "rewards/margins": 0.0, + "rewards/rejected": -264.688232421875, + "step": 603 + }, + { + "epoch": 6.3578947368421055, + "grad_norm": 1.315401846113673e-06, + "learning_rate": 0.00018745263157894738, + "logits/chosen": 13.173993110656738, + "logits/rejected": 13.173993110656738, + "logps/chosen": -3543.5556640625, + "logps/rejected": -3543.5556640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3296813964844, + "rewards/margins": 0.0, + "rewards/rejected": -351.3296813964844, + "step": 604 + }, + { + "epoch": 6.368421052631579, + "grad_norm": 1.1277600151515799e-06, + "learning_rate": 0.00018743157894736843, + "logits/chosen": 13.173890113830566, + "logits/rejected": 13.173890113830566, + "logps/chosen": -3543.80859375, + "logps/rejected": -3543.80859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.35498046875, + "rewards/margins": 0.0, + "rewards/rejected": -351.35498046875, + "step": 605 + }, + { + "epoch": 6.378947368421053, + "grad_norm": 1.1660972631943878e-06, + "learning_rate": 0.00018741052631578948, + "logits/chosen": 13.173995018005371, + "logits/rejected": 13.173995018005371, + "logps/chosen": -3996.23828125, + "logps/rejected": -3996.23828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7965087890625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7965087890625, + "step": 606 + }, + { + "epoch": 6.389473684210526, + "grad_norm": 1.1859199275932042e-06, + "learning_rate": 0.00018738947368421053, + "logits/chosen": 13.171222686767578, + "logits/rejected": 13.171222686767578, + "logps/chosen": -3996.486328125, + "logps/rejected": -3996.486328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8213195800781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8213195800781, + "step": 607 + }, + { + "epoch": 6.4, + "grad_norm": 1.2157888704678044e-06, + "learning_rate": 0.0001873684210526316, + "logits/chosen": 13.165254592895508, + "logits/rejected": 13.165254592895508, + "logps/chosen": -3996.791015625, + "logps/rejected": -3996.791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8517761230469, + "rewards/margins": 0.0, + "rewards/rejected": -396.8517761230469, + "step": 608 + }, + { + "epoch": 6.410526315789474, + "grad_norm": 1.221074739987671e-06, + "learning_rate": 0.00018734736842105265, + "logits/chosen": 13.156908988952637, + "logits/rejected": 13.156908988952637, + "logps/chosen": -3997.234375, + "logps/rejected": -3997.234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8961181640625, + "rewards/margins": 0.0, + "rewards/rejected": -396.8961181640625, + "step": 609 + }, + { + "epoch": 6.421052631578947, + "grad_norm": 1.5741740071462118e-06, + "learning_rate": 0.00018732631578947367, + "logits/chosen": 13.18421459197998, + "logits/rejected": 13.18421459197998, + "logps/chosen": -5173.595703125, + "logps/rejected": -5173.595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.421142578125, + "rewards/margins": 0.0, + "rewards/rejected": -514.421142578125, + "step": 610 + }, + { + "epoch": 6.431578947368421, + "grad_norm": 9.437316634830495e-07, + "learning_rate": 0.00018730526315789475, + "logits/chosen": 13.148723602294922, + "logits/rejected": 13.148723602294922, + "logps/chosen": -3758.5625, + "logps/rejected": -3758.5625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9379577636719, + "rewards/margins": 0.0, + "rewards/rejected": -372.9379577636719, + "step": 611 + }, + { + "epoch": 6.442105263157894, + "grad_norm": 1.4082871757636894e-06, + "learning_rate": 0.0001872842105263158, + "logits/chosen": 13.153526306152344, + "logits/rejected": 13.153526306152344, + "logps/chosen": -4877.2158203125, + "logps/rejected": -4877.2158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8744201660156, + "rewards/margins": 0.0, + "rewards/rejected": -484.8744201660156, + "step": 612 + }, + { + "epoch": 6.4526315789473685, + "grad_norm": 1.0770583003250067e-06, + "learning_rate": 0.00018726315789473685, + "logits/chosen": 13.135787963867188, + "logits/rejected": 13.135787963867188, + "logps/chosen": -3998.919921875, + "logps/rejected": -3998.919921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0646667480469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0646667480469, + "step": 613 + }, + { + "epoch": 6.463157894736842, + "grad_norm": 1.1356005416018888e-06, + "learning_rate": 0.0001872421052631579, + "logits/chosen": 13.141548156738281, + "logits/rejected": 13.141548156738281, + "logps/chosen": -2674.537109375, + "logps/rejected": -2674.537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.672119140625, + "rewards/margins": 0.0, + "rewards/rejected": -264.672119140625, + "step": 614 + }, + { + "epoch": 6.473684210526316, + "grad_norm": 1.0908431704592658e-06, + "learning_rate": 0.00018722105263157897, + "logits/chosen": 13.12234878540039, + "logits/rejected": 13.12234878540039, + "logps/chosen": -3999.23828125, + "logps/rejected": -3999.23828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0965270996094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0965270996094, + "step": 615 + }, + { + "epoch": 6.484210526315789, + "grad_norm": 1.038601681102591e-06, + "learning_rate": 0.00018720000000000002, + "logits/chosen": 13.111235618591309, + "logits/rejected": 13.111235618591309, + "logps/chosen": -3999.603515625, + "logps/rejected": -3999.603515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1330261230469, + "rewards/margins": 0.0, + "rewards/rejected": -397.1330261230469, + "step": 616 + }, + { + "epoch": 6.494736842105263, + "grad_norm": 9.794709967536619e-07, + "learning_rate": 0.00018717894736842105, + "logits/chosen": 13.097511291503906, + "logits/rejected": 13.097511291503906, + "logps/chosen": -4000.416015625, + "logps/rejected": -4000.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.21429443359375, + "rewards/margins": 0.0, + "rewards/rejected": -397.21429443359375, + "step": 617 + }, + { + "epoch": 6.505263157894737, + "grad_norm": 1.4964347201384953e-06, + "learning_rate": 0.00018715789473684212, + "logits/chosen": 13.097776412963867, + "logits/rejected": 13.097776412963867, + "logps/chosen": -4877.37451171875, + "logps/rejected": -4877.37451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8902893066406, + "rewards/margins": 0.0, + "rewards/rejected": -484.8902893066406, + "step": 618 + }, + { + "epoch": 6.515789473684211, + "grad_norm": 1.6902042716537835e-06, + "learning_rate": 0.00018713684210526317, + "logits/chosen": 13.107996940612793, + "logits/rejected": 13.107996940612793, + "logps/chosen": -5172.5341796875, + "logps/rejected": -5172.5341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3150024414062, + "rewards/margins": 0.0, + "rewards/rejected": -514.3150024414062, + "step": 619 + }, + { + "epoch": 6.526315789473684, + "grad_norm": 1.4707082982567954e-06, + "learning_rate": 0.00018711578947368422, + "logits/chosen": 13.081208229064941, + "logits/rejected": 13.081208229064941, + "logps/chosen": -4877.89501953125, + "logps/rejected": -4877.89501953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9423522949219, + "rewards/margins": 0.0, + "rewards/rejected": -484.9423522949219, + "step": 620 + }, + { + "epoch": 6.536842105263158, + "grad_norm": 1.382074970024405e-06, + "learning_rate": 0.00018709473684210527, + "logits/chosen": 13.06719970703125, + "logits/rejected": 13.06719970703125, + "logps/chosen": -4286.072265625, + "logps/rejected": -4286.072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.80987548828125, + "rewards/margins": 0.0, + "rewards/rejected": -425.80987548828125, + "step": 621 + }, + { + "epoch": 6.5473684210526315, + "grad_norm": 8.750578217586735e-07, + "learning_rate": 0.00018707368421052635, + "logits/chosen": 13.063549995422363, + "logits/rejected": 13.063549995422363, + "logps/chosen": -4002.49609375, + "logps/rejected": -4002.49609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.42230224609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.42230224609375, + "step": 622 + }, + { + "epoch": 6.557894736842105, + "grad_norm": 1.558595840833732e-06, + "learning_rate": 0.00018705263157894737, + "logits/chosen": 13.076499938964844, + "logits/rejected": 13.076499938964844, + "logps/chosen": -4878.43505859375, + "logps/rejected": -4878.43505859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.996337890625, + "rewards/margins": 0.0, + "rewards/rejected": -484.996337890625, + "step": 623 + }, + { + "epoch": 6.568421052631579, + "grad_norm": 1.5447517398570199e-06, + "learning_rate": 0.00018703157894736842, + "logits/chosen": 13.07684326171875, + "logits/rejected": 13.07684326171875, + "logps/chosen": -4878.20654296875, + "logps/rejected": -4878.20654296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9734802246094, + "rewards/margins": 0.0, + "rewards/rejected": -484.9734802246094, + "step": 624 + }, + { + "epoch": 6.578947368421053, + "grad_norm": 1.1281501883786405e-06, + "learning_rate": 0.00018701052631578947, + "logits/chosen": 13.074007987976074, + "logits/rejected": 13.074007987976074, + "logps/chosen": -2672.408203125, + "logps/rejected": -2672.408203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.459228515625, + "rewards/margins": 0.0, + "rewards/rejected": -264.459228515625, + "step": 625 + }, + { + "epoch": 6.589473684210526, + "grad_norm": 8.956453143582621e-07, + "learning_rate": 0.00018698947368421054, + "logits/chosen": 13.063782691955566, + "logits/rejected": 13.063782691955566, + "logps/chosen": -4002.556640625, + "logps/rejected": -4002.556640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.4283447265625, + "rewards/margins": 0.0, + "rewards/rejected": -397.4283447265625, + "step": 626 + }, + { + "epoch": 6.6, + "grad_norm": 1.1431949360485305e-06, + "learning_rate": 0.0001869684210526316, + "logits/chosen": 13.061800003051758, + "logits/rejected": 13.061800003051758, + "logps/chosen": -3539.251953125, + "logps/rejected": -3539.251953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.8993225097656, + "rewards/margins": 0.0, + "rewards/rejected": -350.8993225097656, + "step": 627 + }, + { + "epoch": 6.610526315789474, + "grad_norm": 1.4119310662863427e-06, + "learning_rate": 0.00018694736842105264, + "logits/chosen": 13.06440544128418, + "logits/rejected": 13.06440544128418, + "logps/chosen": -4285.8671875, + "logps/rejected": -4285.8671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.78936767578125, + "rewards/margins": 0.0, + "rewards/rejected": -425.78936767578125, + "step": 628 + }, + { + "epoch": 6.621052631578947, + "grad_norm": 1.0821867135746288e-06, + "learning_rate": 0.0001869263157894737, + "logits/chosen": 13.0635404586792, + "logits/rejected": 13.0635404586792, + "logps/chosen": -3539.0703125, + "logps/rejected": -3539.0703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.88116455078125, + "rewards/margins": 0.0, + "rewards/rejected": -350.88116455078125, + "step": 629 + }, + { + "epoch": 6.631578947368421, + "grad_norm": 1.7470437114752713e-06, + "learning_rate": 0.00018690526315789474, + "logits/chosen": 13.084620475769043, + "logits/rejected": 13.084620475769043, + "logps/chosen": -4321.90234375, + "logps/rejected": -4321.90234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.00537109375, + "rewards/margins": 0.0, + "rewards/rejected": -429.00537109375, + "step": 630 + }, + { + "epoch": 6.6421052631578945, + "grad_norm": 1.7168232488984358e-06, + "learning_rate": 0.0001868842105263158, + "logits/chosen": 13.114235877990723, + "logits/rejected": 13.114235877990723, + "logps/chosen": -5172.373046875, + "logps/rejected": -5172.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2988891601562, + "rewards/margins": 0.0, + "rewards/rejected": -514.2988891601562, + "step": 631 + }, + { + "epoch": 6.652631578947369, + "grad_norm": 1.4030877082404913e-06, + "learning_rate": 0.00018686315789473684, + "logits/chosen": 13.106715202331543, + "logits/rejected": 13.106715202331543, + "logps/chosen": -4322.400390625, + "logps/rejected": -4322.400390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.05517578125, + "rewards/margins": 0.0, + "rewards/rejected": -429.05517578125, + "step": 632 + }, + { + "epoch": 6.663157894736842, + "grad_norm": 1.5272150903911097e-06, + "learning_rate": 0.00018684210526315792, + "logits/chosen": 13.105968475341797, + "logits/rejected": 13.105968475341797, + "logps/chosen": -4286.4130859375, + "logps/rejected": -4286.4130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.8439636230469, + "rewards/margins": 0.0, + "rewards/rejected": -425.8439636230469, + "step": 633 + }, + { + "epoch": 6.673684210526316, + "grad_norm": 1.843278937485593e-06, + "learning_rate": 0.00018682105263157896, + "logits/chosen": 13.114410400390625, + "logits/rejected": 13.114410400390625, + "logps/chosen": -3539.841796875, + "logps/rejected": -3539.841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.95831298828125, + "rewards/margins": 0.0, + "rewards/rejected": -350.95831298828125, + "step": 634 + }, + { + "epoch": 6.684210526315789, + "grad_norm": 1.5347978887803038e-06, + "learning_rate": 0.00018680000000000001, + "logits/chosen": 13.126788139343262, + "logits/rejected": 13.126788139343262, + "logps/chosen": -4287.0087890625, + "logps/rejected": -4287.0087890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9035339355469, + "rewards/margins": 0.0, + "rewards/rejected": -425.9035339355469, + "step": 635 + }, + { + "epoch": 6.6947368421052635, + "grad_norm": 1.3687921409655246e-06, + "learning_rate": 0.00018677894736842106, + "logits/chosen": 13.130768775939941, + "logits/rejected": 13.130768775939941, + "logps/chosen": -3540.142578125, + "logps/rejected": -3540.142578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9883728027344, + "rewards/margins": 0.0, + "rewards/rejected": -350.9883728027344, + "step": 636 + }, + { + "epoch": 6.705263157894737, + "grad_norm": 1.0600799669191474e-06, + "learning_rate": 0.0001867578947368421, + "logits/chosen": 13.140104293823242, + "logits/rejected": 13.140104293823242, + "logps/chosen": -2966.134765625, + "logps/rejected": -2966.134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.8872985839844, + "rewards/margins": 0.0, + "rewards/rejected": -293.8872985839844, + "step": 637 + }, + { + "epoch": 6.715789473684211, + "grad_norm": 9.262695925826847e-07, + "learning_rate": 0.00018673684210526316, + "logits/chosen": 13.153818130493164, + "logits/rejected": 13.153818130493164, + "logps/chosen": -2672.134765625, + "logps/rejected": -2672.134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.431884765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.431884765625, + "step": 638 + }, + { + "epoch": 6.726315789473684, + "grad_norm": 1.5997011360013857e-06, + "learning_rate": 0.0001867157894736842, + "logits/chosen": 13.146134376525879, + "logits/rejected": 13.146134376525879, + "logps/chosen": -4000.60546875, + "logps/rejected": -4000.60546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.2332458496094, + "rewards/margins": 0.0, + "rewards/rejected": -397.2332458496094, + "step": 639 + }, + { + "epoch": 6.7368421052631575, + "grad_norm": 2.5365302462887485e-06, + "learning_rate": 0.0001866947368421053, + "logits/chosen": 13.165045738220215, + "logits/rejected": 13.165045738220215, + "logps/chosen": -4878.8984375, + "logps/rejected": -4878.8984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0426940917969, + "rewards/margins": 0.0, + "rewards/rejected": -485.0426940917969, + "step": 640 + }, + { + "epoch": 6.747368421052632, + "grad_norm": 1.5845118923607515e-06, + "learning_rate": 0.00018667368421052634, + "logits/chosen": 13.175384521484375, + "logits/rejected": 13.175384521484375, + "logps/chosen": -4879.02587890625, + "logps/rejected": -4879.02587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.055419921875, + "rewards/margins": 0.0, + "rewards/rejected": -485.055419921875, + "step": 641 + }, + { + "epoch": 6.757894736842105, + "grad_norm": 1.344304223493964e-06, + "learning_rate": 0.00018665263157894736, + "logits/chosen": 13.19092845916748, + "logits/rejected": 13.19092845916748, + "logps/chosen": -4325.8671875, + "logps/rejected": -4325.8671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.40185546875, + "rewards/margins": 0.0, + "rewards/rejected": -429.40185546875, + "step": 642 + }, + { + "epoch": 6.768421052631579, + "grad_norm": 3.5879138522432186e-06, + "learning_rate": 0.00018663157894736844, + "logits/chosen": 13.225683212280273, + "logits/rejected": 13.225683212280273, + "logps/chosen": -5173.5927734375, + "logps/rejected": -5173.5927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4208374023438, + "rewards/margins": 0.0, + "rewards/rejected": -514.4208374023438, + "step": 643 + }, + { + "epoch": 6.778947368421052, + "grad_norm": 3.543569619068876e-06, + "learning_rate": 0.00018661052631578948, + "logits/chosen": 13.232131004333496, + "logits/rejected": 13.232131004333496, + "logps/chosen": -5173.76513671875, + "logps/rejected": -5173.76513671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4381103515625, + "rewards/margins": 0.0, + "rewards/rejected": -514.4381103515625, + "step": 644 + }, + { + "epoch": 6.7894736842105265, + "grad_norm": 1.3645823173646932e-06, + "learning_rate": 0.00018658947368421053, + "logits/chosen": 13.19379997253418, + "logits/rejected": 13.19379997253418, + "logps/chosen": -3775.4765625, + "logps/rejected": -3775.4765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.6785583496094, + "rewards/margins": 0.0, + "rewards/rejected": -374.6785583496094, + "step": 645 + }, + { + "epoch": 6.8, + "grad_norm": 8.641389968033764e-07, + "learning_rate": 0.00018656842105263158, + "logits/chosen": 13.200854301452637, + "logits/rejected": 13.200854301452637, + "logps/chosen": -2672.62890625, + "logps/rejected": -2672.62890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4812927246094, + "rewards/margins": 0.0, + "rewards/rejected": -264.4812927246094, + "step": 646 + }, + { + "epoch": 6.810526315789474, + "grad_norm": 7.960022117003973e-07, + "learning_rate": 0.00018654736842105266, + "logits/chosen": 13.188977241516113, + "logits/rejected": 13.188977241516113, + "logps/chosen": -2967.58984375, + "logps/rejected": -2967.58984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0328063964844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0328063964844, + "step": 647 + }, + { + "epoch": 6.821052631578947, + "grad_norm": 5.177711500436999e-06, + "learning_rate": 0.00018652631578947368, + "logits/chosen": 13.22115421295166, + "logits/rejected": 13.22115421295166, + "logps/chosen": -5174.9287109375, + "logps/rejected": -5174.9287109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.554443359375, + "rewards/margins": 0.0, + "rewards/rejected": -514.554443359375, + "step": 648 + }, + { + "epoch": 6.831578947368421, + "grad_norm": 1.72847023804934e-06, + "learning_rate": 0.00018650526315789473, + "logits/chosen": 13.200213432312012, + "logits/rejected": 13.200213432312012, + "logps/chosen": -3775.712890625, + "logps/rejected": -3775.712890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7021789550781, + "rewards/margins": 0.0, + "rewards/rejected": -374.7021789550781, + "step": 649 + }, + { + "epoch": 6.842105263157895, + "grad_norm": 9.512602332506503e-07, + "learning_rate": 0.0001864842105263158, + "logits/chosen": 13.214202880859375, + "logits/rejected": 13.214202880859375, + "logps/chosen": -3542.818359375, + "logps/rejected": -3542.818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2559509277344, + "rewards/margins": 0.0, + "rewards/rejected": -351.2559509277344, + "step": 650 + }, + { + "epoch": 6.842105263157895, + "eval_logits/chosen": 13.250373840332031, + "eval_logits/rejected": 13.250373840332031, + "eval_logps/chosen": -4310.6591796875, + "eval_logps/rejected": -4310.6591796875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.1627502441406, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.1627502441406, + "eval_runtime": 4.4113, + "eval_samples_per_second": 2.267, + "eval_steps_per_second": 2.267, + "step": 650 + }, + { + "epoch": 6.852631578947369, + "grad_norm": 1.7020762470565387e-06, + "learning_rate": 0.00018646315789473686, + "logits/chosen": 13.229392051696777, + "logits/rejected": 13.229392051696777, + "logps/chosen": -3543.119140625, + "logps/rejected": -3543.119140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2860412597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2860412597656, + "step": 651 + }, + { + "epoch": 6.863157894736842, + "grad_norm": 3.6161384286970133e-06, + "learning_rate": 0.0001864421052631579, + "logits/chosen": 13.259687423706055, + "logits/rejected": 13.259687423706055, + "logps/chosen": -4879.25537109375, + "logps/rejected": -4879.25537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.078369140625, + "rewards/margins": 0.0, + "rewards/rejected": -485.078369140625, + "step": 652 + }, + { + "epoch": 6.873684210526315, + "grad_norm": 3.063306849071523e-06, + "learning_rate": 0.00018642105263157896, + "logits/chosen": 13.283510208129883, + "logits/rejected": 13.283510208129883, + "logps/chosen": -5176.0205078125, + "logps/rejected": -5176.0205078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6636352539062, + "rewards/margins": 0.0, + "rewards/rejected": -514.6636352539062, + "step": 653 + }, + { + "epoch": 6.88421052631579, + "grad_norm": 1.32214620407467e-06, + "learning_rate": 0.00018640000000000003, + "logits/chosen": 13.257993698120117, + "logits/rejected": 13.257993698120117, + "logps/chosen": -4879.55810546875, + "logps/rejected": -4879.55810546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.108642578125, + "rewards/margins": 0.0, + "rewards/rejected": -485.108642578125, + "step": 654 + }, + { + "epoch": 6.894736842105263, + "grad_norm": 2.4693399609532207e-06, + "learning_rate": 0.00018637894736842105, + "logits/chosen": 13.275491714477539, + "logits/rejected": 13.275491714477539, + "logps/chosen": -5176.6357421875, + "logps/rejected": -5176.6357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7251586914062, + "rewards/margins": 0.0, + "rewards/rejected": -514.7251586914062, + "step": 655 + }, + { + "epoch": 6.905263157894737, + "grad_norm": 1.0873590099436115e-06, + "learning_rate": 0.0001863578947368421, + "logits/chosen": 13.24202823638916, + "logits/rejected": 13.24202823638916, + "logps/chosen": -3543.806640625, + "logps/rejected": -3543.806640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.35479736328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.35479736328125, + "step": 656 + }, + { + "epoch": 6.91578947368421, + "grad_norm": 2.6836864890356082e-06, + "learning_rate": 0.00018633684210526315, + "logits/chosen": 13.268362998962402, + "logits/rejected": 13.268362998962402, + "logps/chosen": -4878.84375, + "logps/rejected": -4878.84375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0372009277344, + "rewards/margins": 0.0, + "rewards/rejected": -485.0372009277344, + "step": 657 + }, + { + "epoch": 6.926315789473684, + "grad_norm": 1.329520387116645e-06, + "learning_rate": 0.00018631578947368423, + "logits/chosen": 13.268468856811523, + "logits/rejected": 13.268468856811523, + "logps/chosen": -3776.91796875, + "logps/rejected": -3776.91796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.82269287109375, + "rewards/margins": 0.0, + "rewards/rejected": -374.82269287109375, + "step": 658 + }, + { + "epoch": 6.936842105263158, + "grad_norm": 1.1006051181539078e-06, + "learning_rate": 0.00018629473684210528, + "logits/chosen": 13.280906677246094, + "logits/rejected": 13.280906677246094, + "logps/chosen": -3544.0625, + "logps/rejected": -3544.0625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.38037109375, + "rewards/margins": 0.0, + "rewards/rejected": -351.38037109375, + "step": 659 + }, + { + "epoch": 6.947368421052632, + "grad_norm": 2.2968833945924416e-06, + "learning_rate": 0.00018627368421052633, + "logits/chosen": 13.311330795288086, + "logits/rejected": 13.311330795288086, + "logps/chosen": -4880.15869140625, + "logps/rejected": -4880.15869140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.168701171875, + "rewards/margins": 0.0, + "rewards/rejected": -485.168701171875, + "step": 660 + }, + { + "epoch": 6.957894736842105, + "grad_norm": 1.642505139898276e-06, + "learning_rate": 0.00018625263157894738, + "logits/chosen": 13.310528755187988, + "logits/rejected": 13.310528755187988, + "logps/chosen": -2671.78125, + "logps/rejected": -2671.78125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.39654541015625, + "rewards/margins": 0.0, + "rewards/rejected": -264.39654541015625, + "step": 661 + }, + { + "epoch": 6.968421052631579, + "grad_norm": 1.7499687601230107e-06, + "learning_rate": 0.00018623157894736843, + "logits/chosen": 13.307360649108887, + "logits/rejected": 13.307360649108887, + "logps/chosen": -3756.353515625, + "logps/rejected": -3756.353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.717041015625, + "rewards/margins": 0.0, + "rewards/rejected": -372.717041015625, + "step": 662 + }, + { + "epoch": 6.978947368421053, + "grad_norm": 9.157689646599465e-07, + "learning_rate": 0.00018621052631578947, + "logits/chosen": 13.295308113098145, + "logits/rejected": 13.295308113098145, + "logps/chosen": -3544.298828125, + "logps/rejected": -3544.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4040222167969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4040222167969, + "step": 663 + }, + { + "epoch": 6.989473684210527, + "grad_norm": 1.0724845651566284e-06, + "learning_rate": 0.00018618947368421052, + "logits/chosen": 13.297536849975586, + "logits/rejected": 13.297536849975586, + "logps/chosen": -3756.572265625, + "logps/rejected": -3756.572265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.7389221191406, + "rewards/margins": 0.0, + "rewards/rejected": -372.7389221191406, + "step": 664 + }, + { + "epoch": 7.0, + "grad_norm": 1.8944460862257984e-06, + "learning_rate": 0.0001861684210526316, + "logits/chosen": 13.286480903625488, + "logits/rejected": 13.286480903625488, + "logps/chosen": -3993.939453125, + "logps/rejected": -3993.939453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5666198730469, + "rewards/margins": 0.0, + "rewards/rejected": -396.5666198730469, + "step": 665 + }, + { + "epoch": 7.010526315789473, + "grad_norm": 2.970632522192318e-06, + "learning_rate": 0.00018614736842105265, + "logits/chosen": 13.296331405639648, + "logits/rejected": 13.296331405639648, + "logps/chosen": -4880.51708984375, + "logps/rejected": -4880.51708984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2045593261719, + "rewards/margins": 0.0, + "rewards/rejected": -485.2045593261719, + "step": 666 + }, + { + "epoch": 7.021052631578947, + "grad_norm": 8.019470101316983e-07, + "learning_rate": 0.0001861263157894737, + "logits/chosen": 13.281067848205566, + "logits/rejected": 13.281067848205566, + "logps/chosen": -3544.388671875, + "logps/rejected": -3544.388671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4129943847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.4129943847656, + "step": 667 + }, + { + "epoch": 7.031578947368421, + "grad_norm": 1.278908030144521e-06, + "learning_rate": 0.00018610526315789475, + "logits/chosen": 13.323651313781738, + "logits/rejected": 13.323651313781738, + "logps/chosen": -5178.09765625, + "logps/rejected": -5178.09765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.871337890625, + "rewards/margins": 0.0, + "rewards/rejected": -514.871337890625, + "step": 668 + }, + { + "epoch": 7.042105263157895, + "grad_norm": 1.986194774872274e-06, + "learning_rate": 0.0001860842105263158, + "logits/chosen": 13.32851505279541, + "logits/rejected": 13.32851505279541, + "logps/chosen": -5177.79296875, + "logps/rejected": -5177.79296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8408813476562, + "rewards/margins": 0.0, + "rewards/rejected": -514.8408813476562, + "step": 669 + }, + { + "epoch": 7.052631578947368, + "grad_norm": 1.6273856999760028e-06, + "learning_rate": 0.00018606315789473685, + "logits/chosen": 13.29207706451416, + "logits/rejected": 13.29207706451416, + "logps/chosen": -3544.349609375, + "logps/rejected": -3544.349609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4090881347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.4090881347656, + "step": 670 + }, + { + "epoch": 7.063157894736842, + "grad_norm": 1.5197899756458355e-06, + "learning_rate": 0.0001860421052631579, + "logits/chosen": 13.292393684387207, + "logits/rejected": 13.292393684387207, + "logps/chosen": -3544.490234375, + "logps/rejected": -3544.490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.42315673828125, + "rewards/margins": 0.0, + "rewards/rejected": -351.42315673828125, + "step": 671 + }, + { + "epoch": 7.073684210526316, + "grad_norm": 1.2948888752362109e-06, + "learning_rate": 0.00018602105263157897, + "logits/chosen": 13.297388076782227, + "logits/rejected": 13.297388076782227, + "logps/chosen": -3757.548828125, + "logps/rejected": -3757.548828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8365783691406, + "rewards/margins": 0.0, + "rewards/rejected": -372.8365783691406, + "step": 672 + }, + { + "epoch": 7.08421052631579, + "grad_norm": 1.240227788912307e-06, + "learning_rate": 0.00018600000000000002, + "logits/chosen": 13.324767112731934, + "logits/rejected": 13.324767112731934, + "logps/chosen": -5178.626953125, + "logps/rejected": -5178.626953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.9242553710938, + "rewards/margins": 0.0, + "rewards/rejected": -514.9242553710938, + "step": 673 + }, + { + "epoch": 7.094736842105263, + "grad_norm": 1.3515210639525321e-06, + "learning_rate": 0.00018597894736842104, + "logits/chosen": 13.28498649597168, + "logits/rejected": 13.28498649597168, + "logps/chosen": -3776.9296875, + "logps/rejected": -3776.9296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8238525390625, + "rewards/margins": 0.0, + "rewards/rejected": -374.8238525390625, + "step": 674 + }, + { + "epoch": 7.105263157894737, + "grad_norm": 1.4354501445268397e-06, + "learning_rate": 0.00018595789473684212, + "logits/chosen": 13.283524513244629, + "logits/rejected": 13.283524513244629, + "logps/chosen": -3994.044921875, + "logps/rejected": -3994.044921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5771789550781, + "rewards/margins": 0.0, + "rewards/rejected": -396.5771789550781, + "step": 675 + }, + { + "epoch": 7.11578947368421, + "grad_norm": 1.4314811096483027e-06, + "learning_rate": 0.00018593684210526317, + "logits/chosen": 13.27773666381836, + "logits/rejected": 13.27773666381836, + "logps/chosen": -3776.982421875, + "logps/rejected": -3776.982421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8291320800781, + "rewards/margins": 0.0, + "rewards/rejected": -374.8291320800781, + "step": 676 + }, + { + "epoch": 7.126315789473685, + "grad_norm": 1.3188907814765116e-06, + "learning_rate": 0.00018591578947368422, + "logits/chosen": 13.273841857910156, + "logits/rejected": 13.273841857910156, + "logps/chosen": -3993.76171875, + "logps/rejected": -3993.76171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5488586425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.5488586425781, + "step": 677 + }, + { + "epoch": 7.136842105263158, + "grad_norm": 1.1867692819578224e-06, + "learning_rate": 0.00018589473684210527, + "logits/chosen": 13.283470153808594, + "logits/rejected": 13.283470153808594, + "logps/chosen": -4879.34814453125, + "logps/rejected": -4879.34814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.087646484375, + "rewards/margins": 0.0, + "rewards/rejected": -485.087646484375, + "step": 678 + }, + { + "epoch": 7.147368421052631, + "grad_norm": 1.3514687680071802e-06, + "learning_rate": 0.00018587368421052634, + "logits/chosen": 13.277853965759277, + "logits/rejected": 13.277853965759277, + "logps/chosen": -4325.521484375, + "logps/rejected": -4325.521484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3672790527344, + "rewards/margins": 0.0, + "rewards/rejected": -429.3672790527344, + "step": 679 + }, + { + "epoch": 7.157894736842105, + "grad_norm": 1.7907515257320483e-06, + "learning_rate": 0.00018585263157894737, + "logits/chosen": 13.255786895751953, + "logits/rejected": 13.255786895751953, + "logps/chosen": -3995.244140625, + "logps/rejected": -3995.244140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.6971130371094, + "rewards/margins": 0.0, + "rewards/rejected": -396.6971130371094, + "step": 680 + }, + { + "epoch": 7.168421052631579, + "grad_norm": 1.462976570110186e-06, + "learning_rate": 0.00018583157894736842, + "logits/chosen": 13.25105094909668, + "logits/rejected": 13.25105094909668, + "logps/chosen": -2671.18359375, + "logps/rejected": -2671.18359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3367614746094, + "rewards/margins": 0.0, + "rewards/rejected": -264.3367614746094, + "step": 681 + }, + { + "epoch": 7.178947368421053, + "grad_norm": 1.485650273025385e-06, + "learning_rate": 0.0001858105263157895, + "logits/chosen": 13.227911949157715, + "logits/rejected": 13.227911949157715, + "logps/chosen": -3777.5625, + "logps/rejected": -3777.5625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.88714599609375, + "rewards/margins": 0.0, + "rewards/rejected": -374.88714599609375, + "step": 682 + }, + { + "epoch": 7.189473684210526, + "grad_norm": 1.1892149132108898e-06, + "learning_rate": 0.00018578947368421054, + "logits/chosen": 13.217670440673828, + "logits/rejected": 13.217670440673828, + "logps/chosen": -3758.11328125, + "logps/rejected": -3758.11328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8930358886719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8930358886719, + "step": 683 + }, + { + "epoch": 7.2, + "grad_norm": 9.843425914368709e-07, + "learning_rate": 0.0001857684210526316, + "logits/chosen": 13.203829765319824, + "logits/rejected": 13.203829765319824, + "logps/chosen": -2671.970703125, + "logps/rejected": -2671.970703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4154968261719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4154968261719, + "step": 684 + }, + { + "epoch": 7.2105263157894735, + "grad_norm": 8.688029993209057e-07, + "learning_rate": 0.00018574736842105264, + "logits/chosen": 13.187112808227539, + "logits/rejected": 13.187112808227539, + "logps/chosen": -2672.224609375, + "logps/rejected": -2672.224609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4408874511719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4408874511719, + "step": 685 + }, + { + "epoch": 7.221052631578948, + "grad_norm": 8.484906288686034e-07, + "learning_rate": 0.00018572631578947372, + "logits/chosen": 13.170920372009277, + "logits/rejected": 13.170920372009277, + "logps/chosen": -2672.4990234375, + "logps/rejected": -2672.4990234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.46832275390625, + "rewards/margins": 0.0, + "rewards/rejected": -264.46832275390625, + "step": 686 + }, + { + "epoch": 7.231578947368421, + "grad_norm": 1.3189201126806438e-06, + "learning_rate": 0.00018570526315789474, + "logits/chosen": 13.146041870117188, + "logits/rejected": 13.146041870117188, + "logps/chosen": -3543.5703125, + "logps/rejected": -3543.5703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3311462402344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3311462402344, + "step": 687 + }, + { + "epoch": 7.242105263157895, + "grad_norm": 3.5408058920438634e-06, + "learning_rate": 0.0001856842105263158, + "logits/chosen": 13.157353401184082, + "logits/rejected": 13.157353401184082, + "logps/chosen": -4878.5478515625, + "logps/rejected": -4878.5478515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.00762939453125, + "rewards/margins": 0.0, + "rewards/rejected": -485.00762939453125, + "step": 688 + }, + { + "epoch": 7.252631578947368, + "grad_norm": 2.385500010859687e-06, + "learning_rate": 0.00018566315789473684, + "logits/chosen": 13.164440155029297, + "logits/rejected": 13.164440155029297, + "logps/chosen": -4325.8369140625, + "logps/rejected": -4325.8369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3988342285156, + "rewards/margins": 0.0, + "rewards/rejected": -429.3988342285156, + "step": 689 + }, + { + "epoch": 7.2631578947368425, + "grad_norm": 1.5604205145791639e-06, + "learning_rate": 0.0001856421052631579, + "logits/chosen": 13.159071922302246, + "logits/rejected": 13.159071922302246, + "logps/chosen": -3997.8828125, + "logps/rejected": -3997.8828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9609680175781, + "rewards/margins": 0.0, + "rewards/rejected": -396.9609680175781, + "step": 690 + }, + { + "epoch": 7.273684210526316, + "grad_norm": 1.8582973098091315e-06, + "learning_rate": 0.00018562105263157896, + "logits/chosen": 13.169404983520508, + "logits/rejected": 13.169404983520508, + "logps/chosen": -2674.0634765625, + "logps/rejected": -2674.0634765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.624755859375, + "rewards/margins": 0.0, + "rewards/rejected": -264.624755859375, + "step": 691 + }, + { + "epoch": 7.284210526315789, + "grad_norm": 2.6544091724645114e-06, + "learning_rate": 0.0001856, + "logits/chosen": 13.18026065826416, + "logits/rejected": 13.18026065826416, + "logps/chosen": -4326.390625, + "logps/rejected": -4326.390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4541931152344, + "rewards/margins": 0.0, + "rewards/rejected": -429.4541931152344, + "step": 692 + }, + { + "epoch": 7.294736842105263, + "grad_norm": 2.2888709736434976e-06, + "learning_rate": 0.00018557894736842106, + "logits/chosen": 13.155786514282227, + "logits/rejected": 13.155786514282227, + "logps/chosen": -4286.35693359375, + "logps/rejected": -4286.35693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.8383483886719, + "rewards/margins": 0.0, + "rewards/rejected": -425.8383483886719, + "step": 693 + }, + { + "epoch": 7.3052631578947365, + "grad_norm": 1.6445006849608035e-06, + "learning_rate": 0.0001855578947368421, + "logits/chosen": 13.16234016418457, + "logits/rejected": 13.16234016418457, + "logps/chosen": -4878.3720703125, + "logps/rejected": -4878.3720703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.99005126953125, + "rewards/margins": 0.0, + "rewards/rejected": -484.99005126953125, + "step": 694 + }, + { + "epoch": 7.315789473684211, + "grad_norm": 9.28954591472575e-07, + "learning_rate": 0.00018553684210526316, + "logits/chosen": 13.133416175842285, + "logits/rejected": 13.133416175842285, + "logps/chosen": -3543.490234375, + "logps/rejected": -3543.490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3231506347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3231506347656, + "step": 695 + }, + { + "epoch": 7.326315789473684, + "grad_norm": 3.3765720672818134e-06, + "learning_rate": 0.0001855157894736842, + "logits/chosen": 13.147964477539062, + "logits/rejected": 13.147964477539062, + "logps/chosen": -4878.134765625, + "logps/rejected": -4878.134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.96630859375, + "rewards/margins": 0.0, + "rewards/rejected": -484.96630859375, + "step": 696 + }, + { + "epoch": 7.336842105263158, + "grad_norm": 3.7226850508886855e-06, + "learning_rate": 0.00018549473684210529, + "logits/chosen": 13.175617218017578, + "logits/rejected": 13.175617218017578, + "logps/chosen": -5173.36669921875, + "logps/rejected": -5173.36669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3982543945312, + "rewards/margins": 0.0, + "rewards/rejected": -514.3982543945312, + "step": 697 + }, + { + "epoch": 7.347368421052631, + "grad_norm": 9.498692747911264e-07, + "learning_rate": 0.00018547368421052633, + "logits/chosen": 13.16496467590332, + "logits/rejected": 13.16496467590332, + "logps/chosen": -3758.5390625, + "logps/rejected": -3758.5390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.93560791015625, + "rewards/margins": 0.0, + "rewards/rejected": -372.93560791015625, + "step": 698 + }, + { + "epoch": 7.3578947368421055, + "grad_norm": 2.379321131229517e-06, + "learning_rate": 0.00018545263157894736, + "logits/chosen": 13.217950820922852, + "logits/rejected": 13.217950820922852, + "logps/chosen": -5173.88623046875, + "logps/rejected": -5173.88623046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4501953125, + "rewards/margins": 0.0, + "rewards/rejected": -514.4501953125, + "step": 699 + }, + { + "epoch": 7.368421052631579, + "grad_norm": 2.889323013732792e-06, + "learning_rate": 0.00018543157894736843, + "logits/chosen": 13.214141845703125, + "logits/rejected": 13.214141845703125, + "logps/chosen": -4878.8359375, + "logps/rejected": -4878.8359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.03643798828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.03643798828125, + "step": 700 + }, + { + "epoch": 7.368421052631579, + "eval_logits/chosen": 13.222890853881836, + "eval_logits/rejected": 13.222890853881836, + "eval_logps/chosen": -4310.6044921875, + "eval_logps/rejected": -4310.6044921875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.1573181152344, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.1573181152344, + "eval_runtime": 4.2544, + "eval_samples_per_second": 2.351, + "eval_steps_per_second": 2.351, + "step": 700 + }, + { + "epoch": 7.378947368421053, + "grad_norm": 1.1947388429689454e-06, + "learning_rate": 0.00018541052631578948, + "logits/chosen": 13.222030639648438, + "logits/rejected": 13.222030639648438, + "logps/chosen": -4327.9228515625, + "logps/rejected": -4327.9228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.607421875, + "rewards/margins": 0.0, + "rewards/rejected": -429.607421875, + "step": 701 + }, + { + "epoch": 7.389473684210526, + "grad_norm": 1.419441900907259e-06, + "learning_rate": 0.00018538947368421053, + "logits/chosen": 13.211609840393066, + "logits/rejected": 13.211609840393066, + "logps/chosen": -4287.52099609375, + "logps/rejected": -4287.52099609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9547424316406, + "rewards/margins": 0.0, + "rewards/rejected": -425.9547424316406, + "step": 702 + }, + { + "epoch": 7.4, + "grad_norm": 1.3463594541462953e-06, + "learning_rate": 0.00018536842105263158, + "logits/chosen": 13.216324806213379, + "logits/rejected": 13.216324806213379, + "logps/chosen": -4287.73095703125, + "logps/rejected": -4287.73095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9757385253906, + "rewards/margins": 0.0, + "rewards/rejected": -425.9757385253906, + "step": 703 + }, + { + "epoch": 7.410526315789474, + "grad_norm": 1.0049189995697816e-06, + "learning_rate": 0.00018534736842105266, + "logits/chosen": 13.217493057250977, + "logits/rejected": 13.217493057250977, + "logps/chosen": -3543.5703125, + "logps/rejected": -3543.5703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3311462402344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3311462402344, + "step": 704 + }, + { + "epoch": 7.421052631578947, + "grad_norm": 1.2000433571301983e-06, + "learning_rate": 0.0001853263157894737, + "logits/chosen": 13.222097396850586, + "logits/rejected": 13.222097396850586, + "logps/chosen": -3996.91015625, + "logps/rejected": -3996.91015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.86370849609375, + "rewards/margins": 0.0, + "rewards/rejected": -396.86370849609375, + "step": 705 + }, + { + "epoch": 7.431578947368421, + "grad_norm": 1.2435954204192967e-06, + "learning_rate": 0.00018530526315789473, + "logits/chosen": 13.22079849243164, + "logits/rejected": 13.22079849243164, + "logps/chosen": -3996.921875, + "logps/rejected": -3996.921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8648681640625, + "rewards/margins": 0.0, + "rewards/rejected": -396.8648681640625, + "step": 706 + }, + { + "epoch": 7.442105263157894, + "grad_norm": 1.420723378942057e-06, + "learning_rate": 0.0001852842105263158, + "logits/chosen": 13.214343070983887, + "logits/rejected": 13.214343070983887, + "logps/chosen": -3777.92578125, + "logps/rejected": -3777.92578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9234619140625, + "rewards/margins": 0.0, + "rewards/rejected": -374.9234619140625, + "step": 707 + }, + { + "epoch": 7.4526315789473685, + "grad_norm": 1.2402593938531936e-06, + "learning_rate": 0.00018526315789473685, + "logits/chosen": 13.21203899383545, + "logits/rejected": 13.21203899383545, + "logps/chosen": -3997.42578125, + "logps/rejected": -3997.42578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9152526855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9152526855469, + "step": 708 + }, + { + "epoch": 7.463157894736842, + "grad_norm": 8.847569006320555e-07, + "learning_rate": 0.0001852421052631579, + "logits/chosen": 13.203995704650879, + "logits/rejected": 13.203995704650879, + "logps/chosen": -3543.4814453125, + "logps/rejected": -3543.4814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.322265625, + "rewards/margins": 0.0, + "rewards/rejected": -351.322265625, + "step": 709 + }, + { + "epoch": 7.473684210526316, + "grad_norm": 1.2636772908081184e-06, + "learning_rate": 0.00018522105263157895, + "logits/chosen": 13.199202537536621, + "logits/rejected": 13.199202537536621, + "logps/chosen": -3778.107421875, + "logps/rejected": -3778.107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9416198730469, + "rewards/margins": 0.0, + "rewards/rejected": -374.9416198730469, + "step": 710 + }, + { + "epoch": 7.484210526315789, + "grad_norm": 9.880624247671221e-07, + "learning_rate": 0.00018520000000000003, + "logits/chosen": 13.195527076721191, + "logits/rejected": 13.195527076721191, + "logps/chosen": -3543.294921875, + "logps/rejected": -3543.294921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3036193847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3036193847656, + "step": 711 + }, + { + "epoch": 7.494736842105263, + "grad_norm": 1.0237110927846516e-06, + "learning_rate": 0.00018517894736842105, + "logits/chosen": 13.193687438964844, + "logits/rejected": 13.193687438964844, + "logps/chosen": -3543.259765625, + "logps/rejected": -3543.259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.30010986328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.30010986328125, + "step": 712 + }, + { + "epoch": 7.505263157894737, + "grad_norm": 1.0467896345289773e-06, + "learning_rate": 0.0001851578947368421, + "logits/chosen": 13.198676109313965, + "logits/rejected": 13.198676109313965, + "logps/chosen": -3758.3916015625, + "logps/rejected": -3758.3916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9208679199219, + "rewards/margins": 0.0, + "rewards/rejected": -372.9208679199219, + "step": 713 + }, + { + "epoch": 7.515789473684211, + "grad_norm": 1.1414570053602802e-06, + "learning_rate": 0.00018513684210526318, + "logits/chosen": 13.191129684448242, + "logits/rejected": 13.191129684448242, + "logps/chosen": -3998.025390625, + "logps/rejected": -3998.025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9752197265625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9752197265625, + "step": 714 + }, + { + "epoch": 7.526315789473684, + "grad_norm": 1.6184153537324164e-06, + "learning_rate": 0.00018511578947368423, + "logits/chosen": 13.223023414611816, + "logits/rejected": 13.223023414611816, + "logps/chosen": -5174.17236328125, + "logps/rejected": -5174.17236328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4788208007812, + "rewards/margins": 0.0, + "rewards/rejected": -514.4788208007812, + "step": 715 + }, + { + "epoch": 7.536842105263158, + "grad_norm": 1.2566912346301251e-06, + "learning_rate": 0.00018509473684210528, + "logits/chosen": 13.179170608520508, + "logits/rejected": 13.179170608520508, + "logps/chosen": -3778.5859375, + "logps/rejected": -3778.5859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9894714355469, + "rewards/margins": 0.0, + "rewards/rejected": -374.9894714355469, + "step": 716 + }, + { + "epoch": 7.5473684210526315, + "grad_norm": 8.977760330708406e-07, + "learning_rate": 0.00018507368421052632, + "logits/chosen": 13.176719665527344, + "logits/rejected": 13.176719665527344, + "logps/chosen": -3543.41796875, + "logps/rejected": -3543.41796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.31591796875, + "rewards/margins": 0.0, + "rewards/rejected": -351.31591796875, + "step": 717 + }, + { + "epoch": 7.557894736842105, + "grad_norm": 8.587193178755115e-07, + "learning_rate": 0.00018505263157894737, + "logits/chosen": 13.175223350524902, + "logits/rejected": 13.175223350524902, + "logps/chosen": -2967.9755859375, + "logps/rejected": -2967.9755859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0713806152344, + "rewards/margins": 0.0, + "rewards/rejected": -294.0713806152344, + "step": 718 + }, + { + "epoch": 7.568421052631579, + "grad_norm": 1.6446061863462091e-06, + "learning_rate": 0.00018503157894736842, + "logits/chosen": 13.213809967041016, + "logits/rejected": 13.213809967041016, + "logps/chosen": -5173.822265625, + "logps/rejected": -5173.822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4437866210938, + "rewards/margins": 0.0, + "rewards/rejected": -514.4437866210938, + "step": 719 + }, + { + "epoch": 7.578947368421053, + "grad_norm": 1.615068413229892e-06, + "learning_rate": 0.00018501052631578947, + "logits/chosen": 13.215839385986328, + "logits/rejected": 13.215839385986328, + "logps/chosen": -5174.09814453125, + "logps/rejected": -5174.09814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4713745117188, + "rewards/margins": 0.0, + "rewards/rejected": -514.4713745117188, + "step": 720 + }, + { + "epoch": 7.589473684210526, + "grad_norm": 8.918532898860576e-07, + "learning_rate": 0.00018498947368421052, + "logits/chosen": 13.180612564086914, + "logits/rejected": 13.180612564086914, + "logps/chosen": -3544.0849609375, + "logps/rejected": -3544.0849609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.38262939453125, + "rewards/margins": 0.0, + "rewards/rejected": -351.38262939453125, + "step": 721 + }, + { + "epoch": 7.6, + "grad_norm": 1.1378305089237983e-06, + "learning_rate": 0.0001849684210526316, + "logits/chosen": 13.187461853027344, + "logits/rejected": 13.187461853027344, + "logps/chosen": -3997.93359375, + "logps/rejected": -3997.93359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9660339355469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9660339355469, + "step": 722 + }, + { + "epoch": 7.610526315789474, + "grad_norm": 1.1426026276240009e-06, + "learning_rate": 0.00018494736842105265, + "logits/chosen": 13.188652038574219, + "logits/rejected": 13.188652038574219, + "logps/chosen": -3997.9140625, + "logps/rejected": -3997.9140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9640808105469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9640808105469, + "step": 723 + }, + { + "epoch": 7.621052631578947, + "grad_norm": 1.634643354009313e-06, + "learning_rate": 0.0001849263157894737, + "logits/chosen": 13.204824447631836, + "logits/rejected": 13.204824447631836, + "logps/chosen": -4877.31201171875, + "logps/rejected": -4877.31201171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.884033203125, + "rewards/margins": 0.0, + "rewards/rejected": -484.884033203125, + "step": 724 + }, + { + "epoch": 7.631578947368421, + "grad_norm": 1.768266542967467e-06, + "learning_rate": 0.00018490526315789475, + "logits/chosen": 13.202880859375, + "logits/rejected": 13.202880859375, + "logps/chosen": -4877.3203125, + "logps/rejected": -4877.3203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8848571777344, + "rewards/margins": 0.0, + "rewards/rejected": -484.8848571777344, + "step": 725 + }, + { + "epoch": 7.6421052631578945, + "grad_norm": 1.7937302345671924e-06, + "learning_rate": 0.0001848842105263158, + "logits/chosen": 13.2013578414917, + "logits/rejected": 13.2013578414917, + "logps/chosen": -4877.49462890625, + "logps/rejected": -4877.49462890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9023132324219, + "rewards/margins": 0.0, + "rewards/rejected": -484.9023132324219, + "step": 726 + }, + { + "epoch": 7.652631578947369, + "grad_norm": 1.173831947198778e-06, + "learning_rate": 0.00018486315789473684, + "logits/chosen": 13.180315971374512, + "logits/rejected": 13.180315971374512, + "logps/chosen": -3998.373046875, + "logps/rejected": -3998.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0099792480469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0099792480469, + "step": 727 + }, + { + "epoch": 7.663157894736842, + "grad_norm": 1.8086591353494441e-06, + "learning_rate": 0.0001848421052631579, + "logits/chosen": 13.21358871459961, + "logits/rejected": 13.21358871459961, + "logps/chosen": -5175.4736328125, + "logps/rejected": -5175.4736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6089477539062, + "rewards/margins": 0.0, + "rewards/rejected": -514.6089477539062, + "step": 728 + }, + { + "epoch": 7.673684210526316, + "grad_norm": 1.3535036487155594e-06, + "learning_rate": 0.00018482105263157897, + "logits/chosen": 13.170319557189941, + "logits/rejected": 13.170319557189941, + "logps/chosen": -4288.05224609375, + "logps/rejected": -4288.05224609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.00787353515625, + "rewards/margins": 0.0, + "rewards/rejected": -426.00787353515625, + "step": 729 + }, + { + "epoch": 7.684210526315789, + "grad_norm": 1.0312728591088671e-06, + "learning_rate": 0.00018480000000000002, + "logits/chosen": 13.164056777954102, + "logits/rejected": 13.164056777954102, + "logps/chosen": -2966.96484375, + "logps/rejected": -2966.96484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9703063964844, + "rewards/margins": 0.0, + "rewards/rejected": -293.9703063964844, + "step": 730 + }, + { + "epoch": 7.6947368421052635, + "grad_norm": 1.3390608728514053e-06, + "learning_rate": 0.00018477894736842104, + "logits/chosen": 13.161995887756348, + "logits/rejected": 13.161995887756348, + "logps/chosen": -4287.9638671875, + "logps/rejected": -4287.9638671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9990234375, + "rewards/margins": 0.0, + "rewards/rejected": -425.9990234375, + "step": 731 + }, + { + "epoch": 7.705263157894737, + "grad_norm": 2.1017040126025677e-06, + "learning_rate": 0.00018475789473684212, + "logits/chosen": 13.177229881286621, + "logits/rejected": 13.177229881286621, + "logps/chosen": -4878.3525390625, + "logps/rejected": -4878.3525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.98809814453125, + "rewards/margins": 0.0, + "rewards/rejected": -484.98809814453125, + "step": 732 + }, + { + "epoch": 7.715789473684211, + "grad_norm": 1.4877908824928454e-06, + "learning_rate": 0.00018473684210526317, + "logits/chosen": 13.151344299316406, + "logits/rejected": 13.151344299316406, + "logps/chosen": -3776.767578125, + "logps/rejected": -3776.767578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8076477050781, + "rewards/margins": 0.0, + "rewards/rejected": -374.8076477050781, + "step": 733 + }, + { + "epoch": 7.726315789473684, + "grad_norm": 1.8996194057763205e-06, + "learning_rate": 0.00018471578947368422, + "logits/chosen": 13.189788818359375, + "logits/rejected": 13.189788818359375, + "logps/chosen": -5175.0615234375, + "logps/rejected": -5175.0615234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5677490234375, + "rewards/margins": 0.0, + "rewards/rejected": -514.5677490234375, + "step": 734 + }, + { + "epoch": 7.7368421052631575, + "grad_norm": 1.2435864391591167e-06, + "learning_rate": 0.00018469473684210527, + "logits/chosen": 13.152837753295898, + "logits/rejected": 13.152837753295898, + "logps/chosen": -3756.8515625, + "logps/rejected": -3756.8515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.766845703125, + "rewards/margins": 0.0, + "rewards/rejected": -372.766845703125, + "step": 735 + }, + { + "epoch": 7.747368421052632, + "grad_norm": 1.2046779147567577e-06, + "learning_rate": 0.00018467368421052634, + "logits/chosen": 13.14518928527832, + "logits/rejected": 13.14518928527832, + "logps/chosen": -3998.337890625, + "logps/rejected": -3998.337890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0064697265625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0064697265625, + "step": 736 + }, + { + "epoch": 7.757894736842105, + "grad_norm": 2.169391109418939e-06, + "learning_rate": 0.0001846526315789474, + "logits/chosen": 13.157787322998047, + "logits/rejected": 13.157787322998047, + "logps/chosen": -4878.3857421875, + "logps/rejected": -4878.3857421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9914245605469, + "rewards/margins": 0.0, + "rewards/rejected": -484.9914245605469, + "step": 737 + }, + { + "epoch": 7.768421052631579, + "grad_norm": 2.0689542452601017e-06, + "learning_rate": 0.00018463157894736841, + "logits/chosen": 13.153223991394043, + "logits/rejected": 13.153223991394043, + "logps/chosen": -4878.4873046875, + "logps/rejected": -4878.4873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0015563964844, + "rewards/margins": 0.0, + "rewards/rejected": -485.0015563964844, + "step": 738 + }, + { + "epoch": 7.778947368421052, + "grad_norm": 1.987834821193246e-06, + "learning_rate": 0.0001846105263157895, + "logits/chosen": 13.151511192321777, + "logits/rejected": 13.151511192321777, + "logps/chosen": -4878.623046875, + "logps/rejected": -4878.623046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.01513671875, + "rewards/margins": 0.0, + "rewards/rejected": -485.01513671875, + "step": 739 + }, + { + "epoch": 7.7894736842105265, + "grad_norm": 1.003082729766902e-06, + "learning_rate": 0.00018458947368421054, + "logits/chosen": 13.128302574157715, + "logits/rejected": 13.128302574157715, + "logps/chosen": -2966.07421875, + "logps/rejected": -2966.07421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.8812255859375, + "rewards/margins": 0.0, + "rewards/rejected": -293.8812255859375, + "step": 740 + }, + { + "epoch": 7.8, + "grad_norm": 1.1290779866612866e-06, + "learning_rate": 0.0001845684210526316, + "logits/chosen": 13.134882926940918, + "logits/rejected": 13.134882926940918, + "logps/chosen": -2669.935546875, + "logps/rejected": -2669.935546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.21197509765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.21197509765625, + "step": 741 + }, + { + "epoch": 7.810526315789474, + "grad_norm": 1.2003541769445292e-06, + "learning_rate": 0.00018454736842105264, + "logits/chosen": 13.126954078674316, + "logits/rejected": 13.126954078674316, + "logps/chosen": -3540.2578125, + "logps/rejected": -3540.2578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9999084472656, + "rewards/margins": 0.0, + "rewards/rejected": -350.9999084472656, + "step": 742 + }, + { + "epoch": 7.821052631578947, + "grad_norm": 2.1146092876733746e-06, + "learning_rate": 0.00018452631578947371, + "logits/chosen": 13.170341491699219, + "logits/rejected": 13.170341491699219, + "logps/chosen": -5173.37109375, + "logps/rejected": -5173.37109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.398681640625, + "rewards/margins": 0.0, + "rewards/rejected": -514.398681640625, + "step": 743 + }, + { + "epoch": 7.831578947368421, + "grad_norm": 9.55191012508294e-07, + "learning_rate": 0.00018450526315789474, + "logits/chosen": 13.131455421447754, + "logits/rejected": 13.131455421447754, + "logps/chosen": -2965.537109375, + "logps/rejected": -2965.537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.8275146484375, + "rewards/margins": 0.0, + "rewards/rejected": -293.8275146484375, + "step": 744 + }, + { + "epoch": 7.842105263157895, + "grad_norm": 1.2373399158605025e-06, + "learning_rate": 0.00018448421052631579, + "logits/chosen": 13.138838768005371, + "logits/rejected": 13.138838768005371, + "logps/chosen": -3997.7421875, + "logps/rejected": -3997.7421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9468994140625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9468994140625, + "step": 745 + }, + { + "epoch": 7.852631578947369, + "grad_norm": 9.146277761828969e-07, + "learning_rate": 0.00018446315789473686, + "logits/chosen": 13.135359764099121, + "logits/rejected": 13.135359764099121, + "logps/chosen": -2965.5234375, + "logps/rejected": -2965.5234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.8261413574219, + "rewards/margins": 0.0, + "rewards/rejected": -293.8261413574219, + "step": 746 + }, + { + "epoch": 7.863157894736842, + "grad_norm": 9.955664381777751e-07, + "learning_rate": 0.0001844421052631579, + "logits/chosen": 13.141283988952637, + "logits/rejected": 13.141283988952637, + "logps/chosen": -2669.865234375, + "logps/rejected": -2669.865234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.2049255371094, + "rewards/margins": 0.0, + "rewards/rejected": -264.2049255371094, + "step": 747 + }, + { + "epoch": 7.873684210526315, + "grad_norm": 1.2750660971505567e-06, + "learning_rate": 0.00018442105263157896, + "logits/chosen": 13.137335777282715, + "logits/rejected": 13.137335777282715, + "logps/chosen": -3997.3125, + "logps/rejected": -3997.3125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9039306640625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9039306640625, + "step": 748 + }, + { + "epoch": 7.88421052631579, + "grad_norm": 2.5515464585623704e-06, + "learning_rate": 0.0001844, + "logits/chosen": 13.152668952941895, + "logits/rejected": 13.152668952941895, + "logps/chosen": -4321.96875, + "logps/rejected": -4321.96875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.01202392578125, + "rewards/margins": 0.0, + "rewards/rejected": -429.01202392578125, + "step": 749 + }, + { + "epoch": 7.894736842105263, + "grad_norm": 1.0633815463734209e-06, + "learning_rate": 0.00018437894736842106, + "logits/chosen": 13.141569137573242, + "logits/rejected": 13.141569137573242, + "logps/chosen": -2670.77734375, + "logps/rejected": -2670.77734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.296142578125, + "rewards/margins": 0.0, + "rewards/rejected": -264.296142578125, + "step": 750 + }, + { + "epoch": 7.894736842105263, + "eval_logits/chosen": 13.161173820495605, + "eval_logits/rejected": 13.161173820495605, + "eval_logps/chosen": -4308.30615234375, + "eval_logps/rejected": -4308.30615234375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -427.92742919921875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -427.92742919921875, + "eval_runtime": 4.3335, + "eval_samples_per_second": 2.308, + "eval_steps_per_second": 2.308, + "step": 750 + }, + { + "epoch": 7.905263157894737, + "grad_norm": 1.4386038174052374e-06, + "learning_rate": 0.0001843578947368421, + "logits/chosen": 13.139097213745117, + "logits/rejected": 13.139097213745117, + "logps/chosen": -3539.87890625, + "logps/rejected": -3539.87890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9620056152344, + "rewards/margins": 0.0, + "rewards/rejected": -350.9620056152344, + "step": 751 + }, + { + "epoch": 7.91578947368421, + "grad_norm": 1.4269968460212112e-06, + "learning_rate": 0.00018433684210526316, + "logits/chosen": 13.14493465423584, + "logits/rejected": 13.14493465423584, + "logps/chosen": -4287.3388671875, + "logps/rejected": -4287.3388671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9365234375, + "rewards/margins": 0.0, + "rewards/rejected": -425.9365234375, + "step": 752 + }, + { + "epoch": 7.926315789473684, + "grad_norm": 1.2781040368281538e-06, + "learning_rate": 0.0001843157894736842, + "logits/chosen": 13.150657653808594, + "logits/rejected": 13.150657653808594, + "logps/chosen": -2671.9609375, + "logps/rejected": -2671.9609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4145202636719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4145202636719, + "step": 753 + }, + { + "epoch": 7.936842105263158, + "grad_norm": 1.382065192956361e-06, + "learning_rate": 0.00018429473684210528, + "logits/chosen": 13.151413917541504, + "logits/rejected": 13.151413917541504, + "logps/chosen": -3757.353515625, + "logps/rejected": -3757.353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8170471191406, + "rewards/margins": 0.0, + "rewards/rejected": -372.8170471191406, + "step": 754 + }, + { + "epoch": 7.947368421052632, + "grad_norm": 2.0304607915022643e-06, + "learning_rate": 0.00018427368421052633, + "logits/chosen": 13.185158729553223, + "logits/rejected": 13.185158729553223, + "logps/chosen": -5173.490234375, + "logps/rejected": -5173.490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4105834960938, + "rewards/margins": 0.0, + "rewards/rejected": -514.4105834960938, + "step": 755 + }, + { + "epoch": 7.957894736842105, + "grad_norm": 9.280582844439778e-07, + "learning_rate": 0.00018425263157894738, + "logits/chosen": 13.14723014831543, + "logits/rejected": 13.14723014831543, + "logps/chosen": -2673.048828125, + "logps/rejected": -2673.048828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5232849121094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5232849121094, + "step": 756 + }, + { + "epoch": 7.968421052631579, + "grad_norm": 1.270628899874282e-06, + "learning_rate": 0.00018423157894736843, + "logits/chosen": 13.143248558044434, + "logits/rejected": 13.143248558044434, + "logps/chosen": -3998.091796875, + "logps/rejected": -3998.091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.98187255859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.98187255859375, + "step": 757 + }, + { + "epoch": 7.978947368421053, + "grad_norm": 8.530884088031598e-07, + "learning_rate": 0.00018421052631578948, + "logits/chosen": 13.136255264282227, + "logits/rejected": 13.136255264282227, + "logps/chosen": -2967.951171875, + "logps/rejected": -2967.951171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0689392089844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0689392089844, + "step": 758 + }, + { + "epoch": 7.989473684210527, + "grad_norm": 1.972178324649576e-06, + "learning_rate": 0.00018418947368421053, + "logits/chosen": 13.158719062805176, + "logits/rejected": 13.158719062805176, + "logps/chosen": -4877.99609375, + "logps/rejected": -4877.99609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.95245361328125, + "rewards/margins": 0.0, + "rewards/rejected": -484.95245361328125, + "step": 759 + }, + { + "epoch": 8.0, + "grad_norm": 1.0304795523552457e-06, + "learning_rate": 0.00018416842105263158, + "logits/chosen": 13.136702537536621, + "logits/rejected": 13.136702537536621, + "logps/chosen": -3541.220703125, + "logps/rejected": -3541.220703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.09619140625, + "rewards/margins": 0.0, + "rewards/rejected": -351.09619140625, + "step": 760 + }, + { + "epoch": 8.010526315789473, + "grad_norm": 9.369648523716023e-07, + "learning_rate": 0.00018414736842105266, + "logits/chosen": 13.140413284301758, + "logits/rejected": 13.140413284301758, + "logps/chosen": -2968.234375, + "logps/rejected": -2968.234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0972595214844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0972595214844, + "step": 761 + }, + { + "epoch": 8.021052631578947, + "grad_norm": 2.2311128304863814e-06, + "learning_rate": 0.0001841263157894737, + "logits/chosen": 13.189372062683105, + "logits/rejected": 13.189372062683105, + "logps/chosen": -5173.771484375, + "logps/rejected": -5173.771484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.438720703125, + "rewards/margins": 0.0, + "rewards/rejected": -514.438720703125, + "step": 762 + }, + { + "epoch": 8.031578947368422, + "grad_norm": 2.4661135284986813e-06, + "learning_rate": 0.00018410526315789473, + "logits/chosen": 13.174569129943848, + "logits/rejected": 13.174569129943848, + "logps/chosen": -4877.75439453125, + "logps/rejected": -4877.75439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.92828369140625, + "rewards/margins": 0.0, + "rewards/rejected": -484.92828369140625, + "step": 763 + }, + { + "epoch": 8.042105263157895, + "grad_norm": 1.6518553138666903e-06, + "learning_rate": 0.0001840842105263158, + "logits/chosen": 13.149615287780762, + "logits/rejected": 13.149615287780762, + "logps/chosen": -3774.9501953125, + "logps/rejected": -3774.9501953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.62591552734375, + "rewards/margins": 0.0, + "rewards/rejected": -374.62591552734375, + "step": 764 + }, + { + "epoch": 8.052631578947368, + "grad_norm": 1.2084163927283953e-06, + "learning_rate": 0.00018406315789473685, + "logits/chosen": 13.151865005493164, + "logits/rejected": 13.151865005493164, + "logps/chosen": -3541.8154296875, + "logps/rejected": -3541.8154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1556701660156, + "rewards/margins": 0.0, + "rewards/rejected": -351.1556701660156, + "step": 765 + }, + { + "epoch": 8.063157894736841, + "grad_norm": 1.1007351758962614e-06, + "learning_rate": 0.0001840421052631579, + "logits/chosen": 13.15410041809082, + "logits/rejected": 13.15410041809082, + "logps/chosen": -3542.0419921875, + "logps/rejected": -3542.0419921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1783142089844, + "rewards/margins": 0.0, + "rewards/rejected": -351.1783142089844, + "step": 766 + }, + { + "epoch": 8.073684210526316, + "grad_norm": 2.2168753730511526e-06, + "learning_rate": 0.00018402105263157895, + "logits/chosen": 13.183789253234863, + "logits/rejected": 13.183789253234863, + "logps/chosen": -4878.0517578125, + "logps/rejected": -4878.0517578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9580078125, + "rewards/margins": 0.0, + "rewards/rejected": -484.9580078125, + "step": 767 + }, + { + "epoch": 8.08421052631579, + "grad_norm": 1.6371959645766765e-06, + "learning_rate": 0.00018400000000000003, + "logits/chosen": 13.161255836486816, + "logits/rejected": 13.161255836486816, + "logps/chosen": -3775.40234375, + "logps/rejected": -3775.40234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.6711120605469, + "rewards/margins": 0.0, + "rewards/rejected": -374.6711120605469, + "step": 768 + }, + { + "epoch": 8.094736842105263, + "grad_norm": 1.6546122196814395e-06, + "learning_rate": 0.00018397894736842105, + "logits/chosen": 13.165945053100586, + "logits/rejected": 13.165945053100586, + "logps/chosen": -3775.83984375, + "logps/rejected": -3775.83984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7148742675781, + "rewards/margins": 0.0, + "rewards/rejected": -374.7148742675781, + "step": 769 + }, + { + "epoch": 8.105263157894736, + "grad_norm": 1.4430454484681832e-06, + "learning_rate": 0.0001839578947368421, + "logits/chosen": 13.174822807312012, + "logits/rejected": 13.174822807312012, + "logps/chosen": -3996.798828125, + "logps/rejected": -3996.798828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8525695800781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8525695800781, + "step": 770 + }, + { + "epoch": 8.115789473684211, + "grad_norm": 9.826237601373577e-07, + "learning_rate": 0.00018393684210526318, + "logits/chosen": 13.167154312133789, + "logits/rejected": 13.167154312133789, + "logps/chosen": -2967.8173828125, + "logps/rejected": -2967.8173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0555419921875, + "rewards/margins": 0.0, + "rewards/rejected": -294.0555419921875, + "step": 771 + }, + { + "epoch": 8.126315789473685, + "grad_norm": 1.1678483815558138e-06, + "learning_rate": 0.00018391578947368422, + "logits/chosen": 13.165459632873535, + "logits/rejected": 13.165459632873535, + "logps/chosen": -3542.490234375, + "logps/rejected": -3542.490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.22314453125, + "rewards/margins": 0.0, + "rewards/rejected": -351.22314453125, + "step": 772 + }, + { + "epoch": 8.136842105263158, + "grad_norm": 2.1347743768274086e-06, + "learning_rate": 0.00018389473684210527, + "logits/chosen": 13.209723472595215, + "logits/rejected": 13.209723472595215, + "logps/chosen": -5172.513671875, + "logps/rejected": -5172.513671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3129272460938, + "rewards/margins": 0.0, + "rewards/rejected": -514.3129272460938, + "step": 773 + }, + { + "epoch": 8.147368421052631, + "grad_norm": 1.1475698329377337e-06, + "learning_rate": 0.00018387368421052632, + "logits/chosen": 13.165400505065918, + "logits/rejected": 13.165400505065918, + "logps/chosen": -3542.751953125, + "logps/rejected": -3542.751953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.24932861328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.24932861328125, + "step": 774 + }, + { + "epoch": 8.157894736842104, + "grad_norm": 2.191958628827706e-06, + "learning_rate": 0.0001838526315789474, + "logits/chosen": 13.214207649230957, + "logits/rejected": 13.214207649230957, + "logps/chosen": -5172.4833984375, + "logps/rejected": -5172.4833984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3099365234375, + "rewards/margins": 0.0, + "rewards/rejected": -514.3099365234375, + "step": 775 + }, + { + "epoch": 8.16842105263158, + "grad_norm": 1.6320094573529786e-06, + "learning_rate": 0.00018383157894736842, + "logits/chosen": 13.17676830291748, + "logits/rejected": 13.17676830291748, + "logps/chosen": -3995.75, + "logps/rejected": -3995.75, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7476806640625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7476806640625, + "step": 776 + }, + { + "epoch": 8.178947368421053, + "grad_norm": 1.0968159358526464e-06, + "learning_rate": 0.00018381052631578947, + "logits/chosen": 13.174088478088379, + "logits/rejected": 13.174088478088379, + "logps/chosen": -2672.7353515625, + "logps/rejected": -2672.7353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.491943359375, + "rewards/margins": 0.0, + "rewards/rejected": -264.491943359375, + "step": 777 + }, + { + "epoch": 8.189473684210526, + "grad_norm": 1.1563734005903825e-06, + "learning_rate": 0.00018378947368421055, + "logits/chosen": 13.166203498840332, + "logits/rejected": 13.166203498840332, + "logps/chosen": -3542.9287109375, + "logps/rejected": -3542.9287109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2669982910156, + "rewards/margins": 0.0, + "rewards/rejected": -351.2669982910156, + "step": 778 + }, + { + "epoch": 8.2, + "grad_norm": 2.483112439222168e-06, + "learning_rate": 0.0001837684210526316, + "logits/chosen": 13.211294174194336, + "logits/rejected": 13.211294174194336, + "logps/chosen": -5172.6845703125, + "logps/rejected": -5172.6845703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3300170898438, + "rewards/margins": 0.0, + "rewards/rejected": -514.3300170898438, + "step": 779 + }, + { + "epoch": 8.210526315789474, + "grad_norm": 1.3655018165081856e-06, + "learning_rate": 0.00018374736842105265, + "logits/chosen": 13.173303604125977, + "logits/rejected": 13.173303604125977, + "logps/chosen": -3756.1572265625, + "logps/rejected": -3756.1572265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.6974182128906, + "rewards/margins": 0.0, + "rewards/rejected": -372.6974182128906, + "step": 780 + }, + { + "epoch": 8.221052631578948, + "grad_norm": 1.114269934987533e-06, + "learning_rate": 0.0001837263157894737, + "logits/chosen": 13.165274620056152, + "logits/rejected": 13.165274620056152, + "logps/chosen": -2966.501953125, + "logps/rejected": -2966.501953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.92401123046875, + "rewards/margins": 0.0, + "rewards/rejected": -293.92401123046875, + "step": 781 + }, + { + "epoch": 8.23157894736842, + "grad_norm": 1.7835608332461561e-06, + "learning_rate": 0.00018370526315789474, + "logits/chosen": 13.170153617858887, + "logits/rejected": 13.170153617858887, + "logps/chosen": -3994.923828125, + "logps/rejected": -3994.923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.6650695800781, + "rewards/margins": 0.0, + "rewards/rejected": -396.6650695800781, + "step": 782 + }, + { + "epoch": 8.242105263157894, + "grad_norm": 3.086871402047109e-06, + "learning_rate": 0.0001836842105263158, + "logits/chosen": 13.18794059753418, + "logits/rejected": 13.18794059753418, + "logps/chosen": -4874.416015625, + "logps/rejected": -4874.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.5944519042969, + "rewards/margins": 0.0, + "rewards/rejected": -484.5944519042969, + "step": 783 + }, + { + "epoch": 8.25263157894737, + "grad_norm": 1.2969069302926073e-06, + "learning_rate": 0.00018366315789473684, + "logits/chosen": 13.152263641357422, + "logits/rejected": 13.152263641357422, + "logps/chosen": -3543.099609375, + "logps/rejected": -3543.099609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2840881347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2840881347656, + "step": 784 + }, + { + "epoch": 8.263157894736842, + "grad_norm": 1.7018771814036882e-06, + "learning_rate": 0.0001836421052631579, + "logits/chosen": 13.152185440063477, + "logits/rejected": 13.152185440063477, + "logps/chosen": -3995.17578125, + "logps/rejected": -3995.17578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.6902770996094, + "rewards/margins": 0.0, + "rewards/rejected": -396.6902770996094, + "step": 785 + }, + { + "epoch": 8.273684210526316, + "grad_norm": 1.430383690603776e-06, + "learning_rate": 0.00018362105263157897, + "logits/chosen": 13.146105766296387, + "logits/rejected": 13.146105766296387, + "logps/chosen": -3755.7626953125, + "logps/rejected": -3755.7626953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.657958984375, + "rewards/margins": 0.0, + "rewards/rejected": -372.657958984375, + "step": 786 + }, + { + "epoch": 8.284210526315789, + "grad_norm": 3.482311967673013e-06, + "learning_rate": 0.00018360000000000002, + "logits/chosen": 13.158924102783203, + "logits/rejected": 13.158924102783203, + "logps/chosen": -4873.8955078125, + "logps/rejected": -4873.8955078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.5423889160156, + "rewards/margins": 0.0, + "rewards/rejected": -484.5423889160156, + "step": 787 + }, + { + "epoch": 8.294736842105262, + "grad_norm": 1.1829019967990462e-06, + "learning_rate": 0.00018357894736842104, + "logits/chosen": 13.123421669006348, + "logits/rejected": 13.123421669006348, + "logps/chosen": -2671.015625, + "logps/rejected": -2671.015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3199768066406, + "rewards/margins": 0.0, + "rewards/rejected": -264.3199768066406, + "step": 788 + }, + { + "epoch": 8.305263157894737, + "grad_norm": 3.6452552194532473e-06, + "learning_rate": 0.00018355789473684212, + "logits/chosen": 13.161881446838379, + "logits/rejected": 13.161881446838379, + "logps/chosen": -5171.544921875, + "logps/rejected": -5171.544921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.216064453125, + "rewards/margins": 0.0, + "rewards/rejected": -514.216064453125, + "step": 789 + }, + { + "epoch": 8.31578947368421, + "grad_norm": 4.052975327795139e-06, + "learning_rate": 0.00018353684210526317, + "logits/chosen": 13.157443046569824, + "logits/rejected": 13.157443046569824, + "logps/chosen": -5171.37451171875, + "logps/rejected": -5171.37451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.1990356445312, + "rewards/margins": 0.0, + "rewards/rejected": -514.1990356445312, + "step": 790 + }, + { + "epoch": 8.326315789473684, + "grad_norm": 4.249576250003884e-06, + "learning_rate": 0.00018351578947368421, + "logits/chosen": 13.154475212097168, + "logits/rejected": 13.154475212097168, + "logps/chosen": -5171.173828125, + "logps/rejected": -5171.173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.178955078125, + "rewards/margins": 0.0, + "rewards/rejected": -514.178955078125, + "step": 791 + }, + { + "epoch": 8.336842105263157, + "grad_norm": 2.7736061838368187e-06, + "learning_rate": 0.00018349473684210526, + "logits/chosen": 13.104389190673828, + "logits/rejected": 13.104389190673828, + "logps/chosen": -3994.251953125, + "logps/rejected": -3994.251953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5978698730469, + "rewards/margins": 0.0, + "rewards/rejected": -396.5978698730469, + "step": 792 + }, + { + "epoch": 8.347368421052632, + "grad_norm": 4.41437759945984e-06, + "learning_rate": 0.00018347368421052634, + "logits/chosen": 13.141545295715332, + "logits/rejected": 13.141545295715332, + "logps/chosen": -5171.1044921875, + "logps/rejected": -5171.1044921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.1720581054688, + "rewards/margins": 0.0, + "rewards/rejected": -514.1720581054688, + "step": 793 + }, + { + "epoch": 8.357894736842105, + "grad_norm": 2.124402953995741e-06, + "learning_rate": 0.0001834526315789474, + "logits/chosen": 13.08736515045166, + "logits/rejected": 13.08736515045166, + "logps/chosen": -3993.392578125, + "logps/rejected": -3993.392578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5119323730469, + "rewards/margins": 0.0, + "rewards/rejected": -396.5119323730469, + "step": 794 + }, + { + "epoch": 8.368421052631579, + "grad_norm": 2.1847272364539094e-06, + "learning_rate": 0.0001834315789473684, + "logits/chosen": 13.079763412475586, + "logits/rejected": 13.079763412475586, + "logps/chosen": -3753.921875, + "logps/rejected": -3753.921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.473876953125, + "rewards/margins": 0.0, + "rewards/rejected": -372.473876953125, + "step": 795 + }, + { + "epoch": 8.378947368421052, + "grad_norm": 3.434222207943094e-06, + "learning_rate": 0.0001834105263157895, + "logits/chosen": 13.064455032348633, + "logits/rejected": 13.064455032348633, + "logps/chosen": -4279.939453125, + "logps/rejected": -4279.939453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.19659423828125, + "rewards/margins": 0.0, + "rewards/rejected": -425.19659423828125, + "step": 796 + }, + { + "epoch": 8.389473684210527, + "grad_norm": 2.2049848666938487e-06, + "learning_rate": 0.00018338947368421054, + "logits/chosen": 13.054069519042969, + "logits/rejected": 13.054069519042969, + "logps/chosen": -3538.095703125, + "logps/rejected": -3538.095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.78369140625, + "rewards/margins": 0.0, + "rewards/rejected": -350.78369140625, + "step": 797 + }, + { + "epoch": 8.4, + "grad_norm": 1.7060622212738963e-06, + "learning_rate": 0.0001833684210526316, + "logits/chosen": 13.050280570983887, + "logits/rejected": 13.050280570983887, + "logps/chosen": -2667.341796875, + "logps/rejected": -2667.341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -263.9526062011719, + "rewards/margins": 0.0, + "rewards/rejected": -263.9526062011719, + "step": 798 + }, + { + "epoch": 8.410526315789474, + "grad_norm": 5.362900992622599e-06, + "learning_rate": 0.00018334736842105264, + "logits/chosen": 13.101919174194336, + "logits/rejected": 13.101919174194336, + "logps/chosen": -5169.5625, + "logps/rejected": -5169.5625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.017822265625, + "rewards/margins": 0.0, + "rewards/rejected": -514.017822265625, + "step": 799 + }, + { + "epoch": 8.421052631578947, + "grad_norm": 3.467218675723416e-06, + "learning_rate": 0.0001833263157894737, + "logits/chosen": 13.050479888916016, + "logits/rejected": 13.050479888916016, + "logps/chosen": -3991.6640625, + "logps/rejected": -3991.6640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.3390808105469, + "rewards/margins": 0.0, + "rewards/rejected": -396.3390808105469, + "step": 800 + }, + { + "epoch": 8.421052631578947, + "eval_logits/chosen": 13.067541122436523, + "eval_logits/rejected": 13.067541122436523, + "eval_logps/chosen": -4302.88427734375, + "eval_logps/rejected": -4302.88427734375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -427.38519287109375, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -427.38519287109375, + "eval_runtime": 4.1659, + "eval_samples_per_second": 2.4, + "eval_steps_per_second": 2.4, + "step": 800 + }, + { + "epoch": 8.431578947368422, + "grad_norm": 1.5714698520241654e-06, + "learning_rate": 0.00018330526315789473, + "logits/chosen": 13.039494514465332, + "logits/rejected": 13.039494514465332, + "logps/chosen": -2667.16015625, + "logps/rejected": -2667.16015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -263.9344177246094, + "rewards/margins": 0.0, + "rewards/rejected": -263.9344177246094, + "step": 801 + }, + { + "epoch": 8.442105263157895, + "grad_norm": 3.1759427656652406e-06, + "learning_rate": 0.00018328421052631578, + "logits/chosen": 13.030718803405762, + "logits/rejected": 13.030718803405762, + "logps/chosen": -3767.5625, + "logps/rejected": -3767.5625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.88714599609375, + "rewards/margins": 0.0, + "rewards/rejected": -373.88714599609375, + "step": 802 + }, + { + "epoch": 8.452631578947368, + "grad_norm": 1.6672837546138908e-06, + "learning_rate": 0.00018326315789473686, + "logits/chosen": 13.02714729309082, + "logits/rejected": 13.02714729309082, + "logps/chosen": -2960.42578125, + "logps/rejected": -2960.42578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.3163757324219, + "rewards/margins": 0.0, + "rewards/rejected": -293.3163757324219, + "step": 803 + }, + { + "epoch": 8.463157894736842, + "grad_norm": 2.9747789085377008e-06, + "learning_rate": 0.0001832421052631579, + "logits/chosen": 13.038200378417969, + "logits/rejected": 13.038200378417969, + "logps/chosen": -3751.80078125, + "logps/rejected": -3751.80078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.26177978515625, + "rewards/margins": 0.0, + "rewards/rejected": -372.26177978515625, + "step": 804 + }, + { + "epoch": 8.473684210526315, + "grad_norm": 3.270132538091275e-06, + "learning_rate": 0.00018322105263157896, + "logits/chosen": 13.025984764099121, + "logits/rejected": 13.025984764099121, + "logps/chosen": -3767.630859375, + "logps/rejected": -3767.630859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.89398193359375, + "rewards/margins": 0.0, + "rewards/rejected": -373.89398193359375, + "step": 805 + }, + { + "epoch": 8.48421052631579, + "grad_norm": 3.1023328119772486e-06, + "learning_rate": 0.0001832, + "logits/chosen": 13.033570289611816, + "logits/rejected": 13.033570289611816, + "logps/chosen": -3991.08203125, + "logps/rejected": -3991.08203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.2808837890625, + "rewards/margins": 0.0, + "rewards/rejected": -396.2808837890625, + "step": 806 + }, + { + "epoch": 8.494736842105263, + "grad_norm": 2.9121283660060726e-06, + "learning_rate": 0.00018317894736842108, + "logits/chosen": 13.022355079650879, + "logits/rejected": 13.022355079650879, + "logps/chosen": -3767.962890625, + "logps/rejected": -3767.962890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.92718505859375, + "rewards/margins": 0.0, + "rewards/rejected": -373.92718505859375, + "step": 807 + }, + { + "epoch": 8.505263157894737, + "grad_norm": 5.836352556798374e-06, + "learning_rate": 0.0001831578947368421, + "logits/chosen": 13.06139087677002, + "logits/rejected": 13.06139087677002, + "logps/chosen": -4867.3837890625, + "logps/rejected": -4867.3837890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -483.8912048339844, + "rewards/margins": 0.0, + "rewards/rejected": -483.8912048339844, + "step": 808 + }, + { + "epoch": 8.51578947368421, + "grad_norm": 2.658028734003892e-06, + "learning_rate": 0.00018313684210526316, + "logits/chosen": 13.023484230041504, + "logits/rejected": 13.023484230041504, + "logps/chosen": -3535.515625, + "logps/rejected": -3535.515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.52569580078125, + "rewards/margins": 0.0, + "rewards/rejected": -350.52569580078125, + "step": 809 + }, + { + "epoch": 8.526315789473685, + "grad_norm": 5.972752205707366e-06, + "learning_rate": 0.0001831157894736842, + "logits/chosen": 13.087800979614258, + "logits/rejected": 13.087800979614258, + "logps/chosen": -5165.70703125, + "logps/rejected": -5165.70703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.6322631835938, + "rewards/margins": 0.0, + "rewards/rejected": -513.6322631835938, + "step": 810 + }, + { + "epoch": 8.536842105263158, + "grad_norm": 2.155912397938664e-06, + "learning_rate": 0.00018309473684210528, + "logits/chosen": 13.027406692504883, + "logits/rejected": 13.027406692504883, + "logps/chosen": -2959.890625, + "logps/rejected": -2959.890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.26287841796875, + "rewards/margins": 0.0, + "rewards/rejected": -293.26287841796875, + "step": 811 + }, + { + "epoch": 8.547368421052632, + "grad_norm": 2.102140115312068e-06, + "learning_rate": 0.00018307368421052633, + "logits/chosen": 13.044645309448242, + "logits/rejected": 13.044645309448242, + "logps/chosen": -3535.67578125, + "logps/rejected": -3535.67578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.5417175292969, + "rewards/margins": 0.0, + "rewards/rejected": -350.5417175292969, + "step": 812 + }, + { + "epoch": 8.557894736842105, + "grad_norm": 2.10280563806009e-06, + "learning_rate": 0.00018305263157894738, + "logits/chosen": 13.0581636428833, + "logits/rejected": 13.0581636428833, + "logps/chosen": -3535.451171875, + "logps/rejected": -3535.451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.5192565917969, + "rewards/margins": 0.0, + "rewards/rejected": -350.5192565917969, + "step": 813 + }, + { + "epoch": 8.568421052631578, + "grad_norm": 2.7769708594860276e-06, + "learning_rate": 0.00018303157894736843, + "logits/chosen": 13.076176643371582, + "logits/rejected": 13.076176643371582, + "logps/chosen": -3990.396484375, + "logps/rejected": -3990.396484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.21234130859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.21234130859375, + "step": 814 + }, + { + "epoch": 8.578947368421053, + "grad_norm": 4.9173436309501994e-06, + "learning_rate": 0.00018301052631578948, + "logits/chosen": 13.120112419128418, + "logits/rejected": 13.120112419128418, + "logps/chosen": -4867.4033203125, + "logps/rejected": -4867.4033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -483.8931579589844, + "rewards/margins": 0.0, + "rewards/rejected": -483.8931579589844, + "step": 815 + }, + { + "epoch": 8.589473684210526, + "grad_norm": 1.79261712673906e-06, + "learning_rate": 0.00018298947368421053, + "logits/chosen": 13.083232879638672, + "logits/rejected": 13.083232879638672, + "logps/chosen": -2667.4736328125, + "logps/rejected": -2667.4736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -263.9657897949219, + "rewards/margins": 0.0, + "rewards/rejected": -263.9657897949219, + "step": 816 + }, + { + "epoch": 8.6, + "grad_norm": 4.694145900430158e-06, + "learning_rate": 0.00018296842105263158, + "logits/chosen": 13.135228157043457, + "logits/rejected": 13.135228157043457, + "logps/chosen": -4867.4580078125, + "logps/rejected": -4867.4580078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -483.8986511230469, + "rewards/margins": 0.0, + "rewards/rejected": -483.8986511230469, + "step": 817 + }, + { + "epoch": 8.610526315789473, + "grad_norm": 3.729123363882536e-06, + "learning_rate": 0.00018294736842105265, + "logits/chosen": 13.127240180969238, + "logits/rejected": 13.127240180969238, + "logps/chosen": -4316.6748046875, + "logps/rejected": -4316.6748046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.4826354980469, + "rewards/margins": 0.0, + "rewards/rejected": -428.4826354980469, + "step": 818 + }, + { + "epoch": 8.621052631578948, + "grad_norm": 5.973961378913373e-06, + "learning_rate": 0.0001829263157894737, + "logits/chosen": 13.143715858459473, + "logits/rejected": 13.143715858459473, + "logps/chosen": -4867.77734375, + "logps/rejected": -4867.77734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -483.9305725097656, + "rewards/margins": 0.0, + "rewards/rejected": -483.9305725097656, + "step": 819 + }, + { + "epoch": 8.631578947368421, + "grad_norm": 4.26080896431813e-06, + "learning_rate": 0.00018290526315789472, + "logits/chosen": 13.102783203125, + "logits/rejected": 13.102783203125, + "logps/chosen": -3990.439453125, + "logps/rejected": -3990.439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.2166442871094, + "rewards/margins": 0.0, + "rewards/rejected": -396.2166442871094, + "step": 820 + }, + { + "epoch": 8.642105263157895, + "grad_norm": 4.782623818755383e-06, + "learning_rate": 0.0001828842105263158, + "logits/chosen": 13.121586799621582, + "logits/rejected": 13.121586799621582, + "logps/chosen": -4316.8583984375, + "logps/rejected": -4316.8583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.5009765625, + "rewards/margins": 0.0, + "rewards/rejected": -428.5009765625, + "step": 821 + }, + { + "epoch": 8.652631578947368, + "grad_norm": 4.768724465975538e-06, + "learning_rate": 0.00018286315789473685, + "logits/chosen": 13.10221004486084, + "logits/rejected": 13.10221004486084, + "logps/chosen": -4278.83251953125, + "logps/rejected": -4278.83251953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.0859069824219, + "rewards/margins": 0.0, + "rewards/rejected": -425.0859069824219, + "step": 822 + }, + { + "epoch": 8.663157894736843, + "grad_norm": 1.587369979461073e-06, + "learning_rate": 0.0001828421052631579, + "logits/chosen": 13.131707191467285, + "logits/rejected": 13.131707191467285, + "logps/chosen": -3752.439453125, + "logps/rejected": -3752.439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.3256530761719, + "rewards/margins": 0.0, + "rewards/rejected": -372.3256530761719, + "step": 823 + }, + { + "epoch": 8.673684210526316, + "grad_norm": 3.828426542895613e-06, + "learning_rate": 0.00018282105263157895, + "logits/chosen": 13.185013771057129, + "logits/rejected": 13.185013771057129, + "logps/chosen": -4871.19189453125, + "logps/rejected": -4871.19189453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.27203369140625, + "rewards/margins": 0.0, + "rewards/rejected": -484.27203369140625, + "step": 824 + }, + { + "epoch": 8.68421052631579, + "grad_norm": 6.832236067566555e-06, + "learning_rate": 0.00018280000000000003, + "logits/chosen": 13.153717041015625, + "logits/rejected": 13.153717041015625, + "logps/chosen": -3991.80078125, + "logps/rejected": -3991.80078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.3527526855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.3527526855469, + "step": 825 + }, + { + "epoch": 8.694736842105263, + "grad_norm": 1.7575574702277663e-06, + "learning_rate": 0.00018277894736842107, + "logits/chosen": 13.152008056640625, + "logits/rejected": 13.152008056640625, + "logps/chosen": -3993.4921875, + "logps/rejected": -3993.4921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.52191162109375, + "rewards/margins": 0.0, + "rewards/rejected": -396.52191162109375, + "step": 826 + }, + { + "epoch": 8.705263157894738, + "grad_norm": 9.150597179541364e-06, + "learning_rate": 0.0001827578947368421, + "logits/chosen": 13.180081367492676, + "logits/rejected": 13.180081367492676, + "logps/chosen": -4872.33642578125, + "logps/rejected": -4872.33642578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.386474609375, + "rewards/margins": 0.0, + "rewards/rejected": -484.386474609375, + "step": 827 + }, + { + "epoch": 8.715789473684211, + "grad_norm": 2.8743502298311796e-06, + "learning_rate": 0.00018273684210526317, + "logits/chosen": 13.176284790039062, + "logits/rejected": 13.176284790039062, + "logps/chosen": -4320.876953125, + "logps/rejected": -4320.876953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.90283203125, + "rewards/margins": 0.0, + "rewards/rejected": -428.90283203125, + "step": 828 + }, + { + "epoch": 8.726315789473684, + "grad_norm": 3.557847776392009e-06, + "learning_rate": 0.00018271578947368422, + "logits/chosen": 13.146232604980469, + "logits/rejected": 13.146232604980469, + "logps/chosen": -2669.1416015625, + "logps/rejected": -2669.1416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.132568359375, + "rewards/margins": 0.0, + "rewards/rejected": -264.132568359375, + "step": 829 + }, + { + "epoch": 8.736842105263158, + "grad_norm": 3.86333886126522e-06, + "learning_rate": 0.00018269473684210527, + "logits/chosen": 13.152488708496094, + "logits/rejected": 13.152488708496094, + "logps/chosen": -2669.3154296875, + "logps/rejected": -2669.3154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.14996337890625, + "rewards/margins": 0.0, + "rewards/rejected": -264.14996337890625, + "step": 830 + }, + { + "epoch": 8.74736842105263, + "grad_norm": 1.5854774346735212e-06, + "learning_rate": 0.00018267368421052632, + "logits/chosen": 13.175165176391602, + "logits/rejected": 13.175165176391602, + "logps/chosen": -3755.2841796875, + "logps/rejected": -3755.2841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.610107421875, + "rewards/margins": 0.0, + "rewards/rejected": -372.610107421875, + "step": 831 + }, + { + "epoch": 8.757894736842106, + "grad_norm": 3.911684416380012e-06, + "learning_rate": 0.0001826526315789474, + "logits/chosen": 13.164422988891602, + "logits/rejected": 13.164422988891602, + "logps/chosen": -4283.81982421875, + "logps/rejected": -4283.81982421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.5846252441406, + "rewards/margins": 0.0, + "rewards/rejected": -425.5846252441406, + "step": 832 + }, + { + "epoch": 8.76842105263158, + "grad_norm": 2.929838956333697e-06, + "learning_rate": 0.00018263157894736842, + "logits/chosen": 13.160375595092773, + "logits/rejected": 13.160375595092773, + "logps/chosen": -3775.306640625, + "logps/rejected": -3775.306640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.66156005859375, + "rewards/margins": 0.0, + "rewards/rejected": -374.66156005859375, + "step": 833 + }, + { + "epoch": 8.778947368421052, + "grad_norm": 1.0393248430773383e-06, + "learning_rate": 0.00018261052631578947, + "logits/chosen": 13.15914249420166, + "logits/rejected": 13.15914249420166, + "logps/chosen": -2672.3173828125, + "logps/rejected": -2672.3173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4501647949219, + "rewards/margins": 0.0, + "rewards/rejected": -264.4501647949219, + "step": 834 + }, + { + "epoch": 8.789473684210526, + "grad_norm": 2.0899205992463976e-06, + "learning_rate": 0.00018258947368421054, + "logits/chosen": 13.212599754333496, + "logits/rejected": 13.212599754333496, + "logps/chosen": -4877.4931640625, + "logps/rejected": -4877.4931640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.90216064453125, + "rewards/margins": 0.0, + "rewards/rejected": -484.90216064453125, + "step": 835 + }, + { + "epoch": 8.8, + "grad_norm": 2.8235390345798805e-06, + "learning_rate": 0.0001825684210526316, + "logits/chosen": 13.169595718383789, + "logits/rejected": 13.169595718383789, + "logps/chosen": -3997.38671875, + "logps/rejected": -3997.38671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9113464355469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9113464355469, + "step": 836 + }, + { + "epoch": 8.810526315789474, + "grad_norm": 1.5295136108761653e-06, + "learning_rate": 0.00018254736842105264, + "logits/chosen": 13.15715503692627, + "logits/rejected": 13.15715503692627, + "logps/chosen": -4286.63330078125, + "logps/rejected": -4286.63330078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.865966796875, + "rewards/margins": 0.0, + "rewards/rejected": -425.865966796875, + "step": 837 + }, + { + "epoch": 8.821052631578947, + "grad_norm": 1.4351624031405663e-06, + "learning_rate": 0.0001825263157894737, + "logits/chosen": 13.143031120300293, + "logits/rejected": 13.143031120300293, + "logps/chosen": -4287.49609375, + "logps/rejected": -4287.49609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9522399902344, + "rewards/margins": 0.0, + "rewards/rejected": -425.9522399902344, + "step": 838 + }, + { + "epoch": 8.83157894736842, + "grad_norm": 2.3897052869870095e-06, + "learning_rate": 0.00018250526315789474, + "logits/chosen": 13.192338943481445, + "logits/rejected": 13.192338943481445, + "logps/chosen": -5168.6064453125, + "logps/rejected": -5168.6064453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.9222412109375, + "rewards/margins": 0.0, + "rewards/rejected": -513.9222412109375, + "step": 839 + }, + { + "epoch": 8.842105263157894, + "grad_norm": 2.318000269951881e-06, + "learning_rate": 0.0001824842105263158, + "logits/chosen": 13.167464256286621, + "logits/rejected": 13.167464256286621, + "logps/chosen": -4878.60498046875, + "logps/rejected": -4878.60498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0133361816406, + "rewards/margins": 0.0, + "rewards/rejected": -485.0133361816406, + "step": 840 + }, + { + "epoch": 8.852631578947369, + "grad_norm": 2.030115410889266e-06, + "learning_rate": 0.00018246315789473684, + "logits/chosen": 13.184605598449707, + "logits/rejected": 13.184605598449707, + "logps/chosen": -5169.498046875, + "logps/rejected": -5169.498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.0114135742188, + "rewards/margins": 0.0, + "rewards/rejected": -514.0114135742188, + "step": 841 + }, + { + "epoch": 8.863157894736842, + "grad_norm": 2.633723170220037e-06, + "learning_rate": 0.0001824421052631579, + "logits/chosen": 13.164407730102539, + "logits/rejected": 13.164407730102539, + "logps/chosen": -4879.04052734375, + "logps/rejected": -4879.04052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.056884765625, + "rewards/margins": 0.0, + "rewards/rejected": -485.056884765625, + "step": 842 + }, + { + "epoch": 8.873684210526315, + "grad_norm": 1.2787404557457194e-06, + "learning_rate": 0.00018242105263157897, + "logits/chosen": 13.122583389282227, + "logits/rejected": 13.122583389282227, + "logps/chosen": -4289.45849609375, + "logps/rejected": -4289.45849609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.14849853515625, + "rewards/margins": 0.0, + "rewards/rejected": -426.14849853515625, + "step": 843 + }, + { + "epoch": 8.884210526315789, + "grad_norm": 1.4480827985607903e-06, + "learning_rate": 0.00018240000000000002, + "logits/chosen": 13.151036262512207, + "logits/rejected": 13.151036262512207, + "logps/chosen": -4324.541015625, + "logps/rejected": -4324.541015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.2692565917969, + "rewards/margins": 0.0, + "rewards/rejected": -429.2692565917969, + "step": 844 + }, + { + "epoch": 8.894736842105264, + "grad_norm": 1.2705459084827453e-06, + "learning_rate": 0.00018237894736842106, + "logits/chosen": 13.122919082641602, + "logits/rejected": 13.122919082641602, + "logps/chosen": -3539.318359375, + "logps/rejected": -3539.318359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9059753417969, + "rewards/margins": 0.0, + "rewards/rejected": -350.9059753417969, + "step": 845 + }, + { + "epoch": 8.905263157894737, + "grad_norm": 1.544367364658683e-06, + "learning_rate": 0.00018235789473684211, + "logits/chosen": 13.173503875732422, + "logits/rejected": 13.173503875732422, + "logps/chosen": -4880.1103515625, + "logps/rejected": -4880.1103515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.16387939453125, + "rewards/margins": 0.0, + "rewards/rejected": -485.16387939453125, + "step": 846 + }, + { + "epoch": 8.91578947368421, + "grad_norm": 1.495149717811728e-06, + "learning_rate": 0.00018233684210526316, + "logits/chosen": 13.166247367858887, + "logits/rejected": 13.166247367858887, + "logps/chosen": -4325.126953125, + "logps/rejected": -4325.126953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3278503417969, + "rewards/margins": 0.0, + "rewards/rejected": -429.3278503417969, + "step": 847 + }, + { + "epoch": 8.926315789473684, + "grad_norm": 1.0864290516110486e-06, + "learning_rate": 0.0001823157894736842, + "logits/chosen": 13.142928123474121, + "logits/rejected": 13.142928123474121, + "logps/chosen": -3539.875, + "logps/rejected": -3539.875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.9616394042969, + "rewards/margins": 0.0, + "rewards/rejected": -350.9616394042969, + "step": 848 + }, + { + "epoch": 8.936842105263159, + "grad_norm": 1.004168666440819e-06, + "learning_rate": 0.00018229473684210526, + "logits/chosen": 13.144794464111328, + "logits/rejected": 13.144794464111328, + "logps/chosen": -2673.27734375, + "logps/rejected": -2673.27734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.546142578125, + "rewards/margins": 0.0, + "rewards/rejected": -264.546142578125, + "step": 849 + }, + { + "epoch": 8.947368421052632, + "grad_norm": 1.0678752460080432e-06, + "learning_rate": 0.00018227368421052634, + "logits/chosen": 13.162507057189941, + "logits/rejected": 13.162507057189941, + "logps/chosen": -3540.509765625, + "logps/rejected": -3540.509765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0251159667969, + "rewards/margins": 0.0, + "rewards/rejected": -351.0251159667969, + "step": 850 + }, + { + "epoch": 8.947368421052632, + "eval_logits/chosen": 13.206059455871582, + "eval_logits/rejected": 13.206059455871582, + "eval_logps/chosen": -4309.0830078125, + "eval_logps/rejected": -4309.0830078125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.005126953125, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.005126953125, + "eval_runtime": 4.4669, + "eval_samples_per_second": 2.239, + "eval_steps_per_second": 2.239, + "step": 850 + }, + { + "epoch": 8.957894736842105, + "grad_norm": 9.507299978395167e-07, + "learning_rate": 0.0001822526315789474, + "logits/chosen": 13.165428161621094, + "logits/rejected": 13.165428161621094, + "logps/chosen": -2673.208984375, + "logps/rejected": -2673.208984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.539306640625, + "rewards/margins": 0.0, + "rewards/rejected": -264.539306640625, + "step": 851 + }, + { + "epoch": 8.968421052631578, + "grad_norm": 1.3000028502574423e-06, + "learning_rate": 0.0001822315789473684, + "logits/chosen": 13.173028945922852, + "logits/rejected": 13.173028945922852, + "logps/chosen": -2967.26171875, + "logps/rejected": -2967.26171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9999694824219, + "rewards/margins": 0.0, + "rewards/rejected": -293.9999694824219, + "step": 852 + }, + { + "epoch": 8.978947368421053, + "grad_norm": 1.4566235222446267e-06, + "learning_rate": 0.00018221052631578949, + "logits/chosen": 13.192514419555664, + "logits/rejected": 13.192514419555664, + "logps/chosen": -3541.283203125, + "logps/rejected": -3541.283203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1024475097656, + "rewards/margins": 0.0, + "rewards/rejected": -351.1024475097656, + "step": 853 + }, + { + "epoch": 8.989473684210527, + "grad_norm": 1.726867367324303e-06, + "learning_rate": 0.00018218947368421054, + "logits/chosen": 13.20378589630127, + "logits/rejected": 13.20378589630127, + "logps/chosen": -3995.724609375, + "logps/rejected": -3995.724609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7451477050781, + "rewards/margins": 0.0, + "rewards/rejected": -396.7451477050781, + "step": 854 + }, + { + "epoch": 9.0, + "grad_norm": 1.4313933434095816e-06, + "learning_rate": 0.00018216842105263158, + "logits/chosen": 13.209630012512207, + "logits/rejected": 13.209630012512207, + "logps/chosen": -3996.169921875, + "logps/rejected": -3996.169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7896728515625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7896728515625, + "step": 855 + }, + { + "epoch": 9.010526315789473, + "grad_norm": 9.583002338331426e-07, + "learning_rate": 0.00018214736842105263, + "logits/chosen": 13.211526870727539, + "logits/rejected": 13.211526870727539, + "logps/chosen": -3542.412109375, + "logps/rejected": -3542.412109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.21533203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.21533203125, + "step": 856 + }, + { + "epoch": 9.021052631578947, + "grad_norm": 1.2720788618025836e-06, + "learning_rate": 0.0001821263157894737, + "logits/chosen": 13.24713134765625, + "logits/rejected": 13.24713134765625, + "logps/chosen": -4327.7177734375, + "logps/rejected": -4327.7177734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.5869140625, + "rewards/margins": 0.0, + "rewards/rejected": -429.5869140625, + "step": 857 + }, + { + "epoch": 9.031578947368422, + "grad_norm": 1.4559635701516527e-06, + "learning_rate": 0.00018210526315789476, + "logits/chosen": 13.207448959350586, + "logits/rejected": 13.207448959350586, + "logps/chosen": -2674.173828125, + "logps/rejected": -2674.173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.63580322265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.63580322265625, + "step": 858 + }, + { + "epoch": 9.042105263157895, + "grad_norm": 1.3104336176184006e-06, + "learning_rate": 0.00018208421052631578, + "logits/chosen": 13.251220703125, + "logits/rejected": 13.251220703125, + "logps/chosen": -4328.32421875, + "logps/rejected": -4328.32421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.6475524902344, + "rewards/margins": 0.0, + "rewards/rejected": -429.6475524902344, + "step": 859 + }, + { + "epoch": 9.052631578947368, + "grad_norm": 1.0783320476548397e-06, + "learning_rate": 0.00018206315789473686, + "logits/chosen": 13.22087574005127, + "logits/rejected": 13.22087574005127, + "logps/chosen": -3543.7529296875, + "logps/rejected": -3543.7529296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.34942626953125, + "rewards/margins": 0.0, + "rewards/rejected": -351.34942626953125, + "step": 860 + }, + { + "epoch": 9.063157894736841, + "grad_norm": 9.301684826823475e-07, + "learning_rate": 0.0001820421052631579, + "logits/chosen": 13.215072631835938, + "logits/rejected": 13.215072631835938, + "logps/chosen": -2674.900390625, + "logps/rejected": -2674.900390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7084655761719, + "rewards/margins": 0.0, + "rewards/rejected": -264.7084655761719, + "step": 861 + }, + { + "epoch": 9.073684210526316, + "grad_norm": 1.5434849274242879e-06, + "learning_rate": 0.00018202105263157896, + "logits/chosen": 13.234474182128906, + "logits/rejected": 13.234474182128906, + "logps/chosen": -3757.888671875, + "logps/rejected": -3757.888671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8705749511719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8705749511719, + "step": 862 + }, + { + "epoch": 9.08421052631579, + "grad_norm": 6.232121904758969e-06, + "learning_rate": 0.000182, + "logits/chosen": 13.29504108428955, + "logits/rejected": 13.29504108428955, + "logps/chosen": -5173.61376953125, + "logps/rejected": -5173.61376953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4229736328125, + "rewards/margins": 0.0, + "rewards/rejected": -514.4229736328125, + "step": 863 + }, + { + "epoch": 9.094736842105263, + "grad_norm": 2.5549634301569313e-06, + "learning_rate": 0.00018197894736842108, + "logits/chosen": 13.307636260986328, + "logits/rejected": 13.307636260986328, + "logps/chosen": -5174.4599609375, + "logps/rejected": -5174.4599609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.507568359375, + "rewards/margins": 0.0, + "rewards/rejected": -514.507568359375, + "step": 864 + }, + { + "epoch": 9.105263157894736, + "grad_norm": 6.885562470415607e-06, + "learning_rate": 0.0001819578947368421, + "logits/chosen": 13.242020606994629, + "logits/rejected": 13.242020606994629, + "logps/chosen": -3543.9609375, + "logps/rejected": -3543.9609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3702087402344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3702087402344, + "step": 865 + }, + { + "epoch": 9.115789473684211, + "grad_norm": 2.926565230154665e-06, + "learning_rate": 0.00018193684210526315, + "logits/chosen": 13.247761726379395, + "logits/rejected": 13.247761726379395, + "logps/chosen": -3545.162109375, + "logps/rejected": -3545.162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4903259277344, + "rewards/margins": 0.0, + "rewards/rejected": -351.4903259277344, + "step": 866 + }, + { + "epoch": 9.126315789473685, + "grad_norm": 1.8148497247238993e-06, + "learning_rate": 0.00018191578947368423, + "logits/chosen": 13.250940322875977, + "logits/rejected": 13.250940322875977, + "logps/chosen": -3545.8134765625, + "logps/rejected": -3545.8134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.55548095703125, + "rewards/margins": 0.0, + "rewards/rejected": -351.55548095703125, + "step": 867 + }, + { + "epoch": 9.136842105263158, + "grad_norm": 1.4375851606018841e-05, + "learning_rate": 0.00018189473684210528, + "logits/chosen": 13.320199966430664, + "logits/rejected": 13.320199966430664, + "logps/chosen": -5172.951171875, + "logps/rejected": -5172.951171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.356689453125, + "rewards/margins": 0.0, + "rewards/rejected": -514.356689453125, + "step": 868 + }, + { + "epoch": 9.147368421052631, + "grad_norm": 1.908138074213639e-06, + "learning_rate": 0.00018187368421052633, + "logits/chosen": 13.347817420959473, + "logits/rejected": 13.347817420959473, + "logps/chosen": -5176.21923828125, + "logps/rejected": -5176.21923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6835327148438, + "rewards/margins": 0.0, + "rewards/rejected": -514.6835327148438, + "step": 869 + }, + { + "epoch": 9.157894736842104, + "grad_norm": 1.8556129361968488e-05, + "learning_rate": 0.00018185263157894738, + "logits/chosen": 13.335928916931152, + "logits/rejected": 13.335928916931152, + "logps/chosen": -4871.64404296875, + "logps/rejected": -4871.64404296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.3172302246094, + "rewards/margins": 0.0, + "rewards/rejected": -484.3172302246094, + "step": 870 + }, + { + "epoch": 9.16842105263158, + "grad_norm": 2.607876240290352e-06, + "learning_rate": 0.00018183157894736843, + "logits/chosen": 13.350255012512207, + "logits/rejected": 13.350255012512207, + "logps/chosen": -5177.1142578125, + "logps/rejected": -5177.1142578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7730102539062, + "rewards/margins": 0.0, + "rewards/rejected": -514.7730102539062, + "step": 871 + }, + { + "epoch": 9.178947368421053, + "grad_norm": 5.983564733469393e-06, + "learning_rate": 0.00018181052631578948, + "logits/chosen": 13.271990776062012, + "logits/rejected": 13.271990776062012, + "logps/chosen": -3545.2451171875, + "logps/rejected": -3545.2451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4986267089844, + "rewards/margins": 0.0, + "rewards/rejected": -351.4986267089844, + "step": 872 + }, + { + "epoch": 9.189473684210526, + "grad_norm": 7.573837137897499e-06, + "learning_rate": 0.00018178947368421053, + "logits/chosen": 13.269919395446777, + "logits/rejected": 13.269919395446777, + "logps/chosen": -3544.470703125, + "logps/rejected": -3544.470703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.42120361328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.42120361328125, + "step": 873 + }, + { + "epoch": 9.2, + "grad_norm": 2.123792000929825e-06, + "learning_rate": 0.00018176842105263157, + "logits/chosen": 13.277111053466797, + "logits/rejected": 13.277111053466797, + "logps/chosen": -2673.2978515625, + "logps/rejected": -2673.2978515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5481872558594, + "rewards/margins": 0.0, + "rewards/rejected": -264.5481872558594, + "step": 874 + }, + { + "epoch": 9.210526315789474, + "grad_norm": 2.9013976927672047e-06, + "learning_rate": 0.00018174736842105265, + "logits/chosen": 13.341736793518066, + "logits/rejected": 13.341736793518066, + "logps/chosen": -4875.658203125, + "logps/rejected": -4875.658203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.7186584472656, + "rewards/margins": 0.0, + "rewards/rejected": -484.7186584472656, + "step": 875 + }, + { + "epoch": 9.221052631578948, + "grad_norm": 3.285511411377229e-06, + "learning_rate": 0.0001817263157894737, + "logits/chosen": 13.295886993408203, + "logits/rejected": 13.295886993408203, + "logps/chosen": -2966.7255859375, + "logps/rejected": -2966.7255859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9463806152344, + "rewards/margins": 0.0, + "rewards/rejected": -293.9463806152344, + "step": 876 + }, + { + "epoch": 9.23157894736842, + "grad_norm": 6.2026820160099305e-06, + "learning_rate": 0.00018170526315789475, + "logits/chosen": 13.3085298538208, + "logits/rejected": 13.3085298538208, + "logps/chosen": -3771.986328125, + "logps/rejected": -3771.986328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.32952880859375, + "rewards/margins": 0.0, + "rewards/rejected": -374.32952880859375, + "step": 877 + }, + { + "epoch": 9.242105263157894, + "grad_norm": 3.626013949542539e-06, + "learning_rate": 0.0001816842105263158, + "logits/chosen": 13.35233211517334, + "logits/rejected": 13.35233211517334, + "logps/chosen": -4329.10546875, + "logps/rejected": -4329.10546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.7256774902344, + "rewards/margins": 0.0, + "rewards/rejected": -429.7256774902344, + "step": 878 + }, + { + "epoch": 9.25263157894737, + "grad_norm": 2.1689302229788154e-06, + "learning_rate": 0.00018166315789473685, + "logits/chosen": 13.316628456115723, + "logits/rejected": 13.316628456115723, + "logps/chosen": -3992.447265625, + "logps/rejected": -3992.447265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.41741943359375, + "rewards/margins": 0.0, + "rewards/rejected": -396.41741943359375, + "step": 879 + }, + { + "epoch": 9.263157894736842, + "grad_norm": 6.798334652557969e-06, + "learning_rate": 0.0001816421052631579, + "logits/chosen": 13.310672760009766, + "logits/rejected": 13.310672760009766, + "logps/chosen": -4283.88916015625, + "logps/rejected": -4283.88916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.591552734375, + "rewards/margins": 0.0, + "rewards/rejected": -425.591552734375, + "step": 880 + }, + { + "epoch": 9.273684210526316, + "grad_norm": 4.215072749502724e-06, + "learning_rate": 0.00018162105263157895, + "logits/chosen": 13.305066108703613, + "logits/rejected": 13.305066108703613, + "logps/chosen": -3992.12109375, + "logps/rejected": -3992.12109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.3847961425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.3847961425781, + "step": 881 + }, + { + "epoch": 9.284210526315789, + "grad_norm": 1.9807341686828295e-06, + "learning_rate": 0.00018160000000000002, + "logits/chosen": 13.34605598449707, + "logits/rejected": 13.34605598449707, + "logps/chosen": -4876.4375, + "logps/rejected": -4876.4375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.7966003417969, + "rewards/margins": 0.0, + "rewards/rejected": -484.7966003417969, + "step": 882 + }, + { + "epoch": 9.294736842105262, + "grad_norm": 4.367466772237094e-06, + "learning_rate": 0.00018157894736842107, + "logits/chosen": 13.340597152709961, + "logits/rejected": 13.340597152709961, + "logps/chosen": -4876.533203125, + "logps/rejected": -4876.533203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.80615234375, + "rewards/margins": 0.0, + "rewards/rejected": -484.80615234375, + "step": 883 + }, + { + "epoch": 9.305263157894737, + "grad_norm": 5.206487912801094e-06, + "learning_rate": 0.0001815578947368421, + "logits/chosen": 13.27581787109375, + "logits/rejected": 13.27581787109375, + "logps/chosen": -3993.515625, + "logps/rejected": -3993.515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.5242614746094, + "rewards/margins": 0.0, + "rewards/rejected": -396.5242614746094, + "step": 884 + }, + { + "epoch": 9.31578947368421, + "grad_norm": 2.3805457658454543e-06, + "learning_rate": 0.00018153684210526317, + "logits/chosen": 13.257878303527832, + "logits/rejected": 13.257878303527832, + "logps/chosen": -3545.986328125, + "logps/rejected": -3545.986328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.57275390625, + "rewards/margins": 0.0, + "rewards/rejected": -351.57275390625, + "step": 885 + }, + { + "epoch": 9.326315789473684, + "grad_norm": 4.452103894436732e-06, + "learning_rate": 0.00018151578947368422, + "logits/chosen": 13.283812522888184, + "logits/rejected": 13.283812522888184, + "logps/chosen": -4877.609375, + "logps/rejected": -4877.609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9137878417969, + "rewards/margins": 0.0, + "rewards/rejected": -484.9137878417969, + "step": 886 + }, + { + "epoch": 9.336842105263157, + "grad_norm": 3.1537988434138242e-06, + "learning_rate": 0.00018149473684210527, + "logits/chosen": 13.224468231201172, + "logits/rejected": 13.224468231201172, + "logps/chosen": -3995.57421875, + "logps/rejected": -3995.57421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7301025390625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7301025390625, + "step": 887 + }, + { + "epoch": 9.347368421052632, + "grad_norm": 3.684247531055007e-06, + "learning_rate": 0.00018147368421052632, + "logits/chosen": 13.248326301574707, + "logits/rejected": 13.248326301574707, + "logps/chosen": -4327.654296875, + "logps/rejected": -4327.654296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.58056640625, + "rewards/margins": 0.0, + "rewards/rejected": -429.58056640625, + "step": 888 + }, + { + "epoch": 9.357894736842105, + "grad_norm": 1.287758095713798e-06, + "learning_rate": 0.0001814526315789474, + "logits/chosen": 13.253173828125, + "logits/rejected": 13.253173828125, + "logps/chosen": -4878.9853515625, + "logps/rejected": -4878.9853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0513610839844, + "rewards/margins": 0.0, + "rewards/rejected": -485.0513610839844, + "step": 889 + }, + { + "epoch": 9.368421052631579, + "grad_norm": 3.224185547878733e-06, + "learning_rate": 0.00018143157894736842, + "logits/chosen": 13.204877853393555, + "logits/rejected": 13.204877853393555, + "logps/chosen": -3756.822265625, + "logps/rejected": -3756.822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.763916015625, + "rewards/margins": 0.0, + "rewards/rejected": -372.763916015625, + "step": 890 + }, + { + "epoch": 9.378947368421052, + "grad_norm": 4.099476427654736e-06, + "learning_rate": 0.00018141052631578947, + "logits/chosen": 13.187684059143066, + "logits/rejected": 13.187684059143066, + "logps/chosen": -3543.75, + "logps/rejected": -3543.75, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.34912109375, + "rewards/margins": 0.0, + "rewards/rejected": -351.34912109375, + "step": 891 + }, + { + "epoch": 9.389473684210527, + "grad_norm": 1.3864772654414992e-06, + "learning_rate": 0.00018138947368421054, + "logits/chosen": 13.17573070526123, + "logits/rejected": 13.17573070526123, + "logps/chosen": -2967.041015625, + "logps/rejected": -2967.041015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9779052734375, + "rewards/margins": 0.0, + "rewards/rejected": -293.9779052734375, + "step": 892 + }, + { + "epoch": 9.4, + "grad_norm": 2.9899169931013603e-06, + "learning_rate": 0.0001813684210526316, + "logits/chosen": 13.222283363342285, + "logits/rejected": 13.222283363342285, + "logps/chosen": -4880.19873046875, + "logps/rejected": -4880.19873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1726989746094, + "rewards/margins": 0.0, + "rewards/rejected": -485.1726989746094, + "step": 893 + }, + { + "epoch": 9.410526315789474, + "grad_norm": 2.495072294550482e-06, + "learning_rate": 0.00018134736842105264, + "logits/chosen": 13.171360969543457, + "logits/rejected": 13.171360969543457, + "logps/chosen": -3776.3671875, + "logps/rejected": -3776.3671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7676086425781, + "rewards/margins": 0.0, + "rewards/rejected": -374.7676086425781, + "step": 894 + }, + { + "epoch": 9.421052631578947, + "grad_norm": 8.472140393678274e-07, + "learning_rate": 0.0001813263157894737, + "logits/chosen": 13.171323776245117, + "logits/rejected": 13.171323776245117, + "logps/chosen": -2672.482421875, + "logps/rejected": -2672.482421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4666442871094, + "rewards/margins": 0.0, + "rewards/rejected": -264.4666442871094, + "step": 895 + }, + { + "epoch": 9.431578947368422, + "grad_norm": 8.538849556316563e-07, + "learning_rate": 0.00018130526315789477, + "logits/chosen": 13.17543888092041, + "logits/rejected": 13.17543888092041, + "logps/chosen": -2672.521484375, + "logps/rejected": -2672.521484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4705505371094, + "rewards/margins": 0.0, + "rewards/rejected": -264.4705505371094, + "step": 896 + }, + { + "epoch": 9.442105263157895, + "grad_norm": 9.238790994459123e-07, + "learning_rate": 0.0001812842105263158, + "logits/chosen": 13.184248924255371, + "logits/rejected": 13.184248924255371, + "logps/chosen": -2967.34375, + "logps/rejected": -2967.34375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0081787109375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0081787109375, + "step": 897 + }, + { + "epoch": 9.452631578947368, + "grad_norm": 2.0927225250488846e-06, + "learning_rate": 0.00018126315789473684, + "logits/chosen": 13.237820625305176, + "logits/rejected": 13.237820625305176, + "logps/chosen": -4881.296875, + "logps/rejected": -4881.296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.28253173828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.28253173828125, + "step": 898 + }, + { + "epoch": 9.463157894736842, + "grad_norm": 1.3098076578899054e-06, + "learning_rate": 0.00018124210526315791, + "logits/chosen": 13.197992324829102, + "logits/rejected": 13.197992324829102, + "logps/chosen": -3542.921875, + "logps/rejected": -3542.921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2663269042969, + "rewards/margins": 0.0, + "rewards/rejected": -351.2663269042969, + "step": 899 + }, + { + "epoch": 9.473684210526315, + "grad_norm": 1.5872328731347807e-06, + "learning_rate": 0.00018122105263157896, + "logits/chosen": 13.200522422790527, + "logits/rejected": 13.200522422790527, + "logps/chosen": -3998.515625, + "logps/rejected": -3998.515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0242614746094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0242614746094, + "step": 900 + }, + { + "epoch": 9.473684210526315, + "eval_logits/chosen": 13.234708786010742, + "eval_logits/rejected": 13.234708786010742, + "eval_logps/chosen": -4309.9384765625, + "eval_logps/rejected": -4309.9384765625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.0907287597656, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.0907287597656, + "eval_runtime": 4.3359, + "eval_samples_per_second": 2.306, + "eval_steps_per_second": 2.306, + "step": 900 + }, + { + "epoch": 9.48421052631579, + "grad_norm": 1.831944700825261e-06, + "learning_rate": 0.0001812, + "logits/chosen": 13.205326080322266, + "logits/rejected": 13.205326080322266, + "logps/chosen": -3542.62109375, + "logps/rejected": -3542.62109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2362365722656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2362365722656, + "step": 901 + }, + { + "epoch": 9.494736842105263, + "grad_norm": 1.1897652711923001e-06, + "learning_rate": 0.00018117894736842106, + "logits/chosen": 13.196013450622559, + "logits/rejected": 13.196013450622559, + "logps/chosen": -2673.1015625, + "logps/rejected": -2673.1015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.528564453125, + "rewards/margins": 0.0, + "rewards/rejected": -264.528564453125, + "step": 902 + }, + { + "epoch": 9.505263157894737, + "grad_norm": 8.924276357902272e-07, + "learning_rate": 0.0001811578947368421, + "logits/chosen": 13.195990562438965, + "logits/rejected": 13.195990562438965, + "logps/chosen": -2673.314453125, + "logps/rejected": -2673.314453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.54986572265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.54986572265625, + "step": 903 + }, + { + "epoch": 9.51578947368421, + "grad_norm": 1.743557845657051e-06, + "learning_rate": 0.00018113684210526316, + "logits/chosen": 13.210175514221191, + "logits/rejected": 13.210175514221191, + "logps/chosen": -3757.796875, + "logps/rejected": -3757.796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.86138916015625, + "rewards/margins": 0.0, + "rewards/rejected": -372.86138916015625, + "step": 904 + }, + { + "epoch": 9.526315789473685, + "grad_norm": 1.0627876463331631e-06, + "learning_rate": 0.0001811157894736842, + "logits/chosen": 13.201913833618164, + "logits/rejected": 13.201913833618164, + "logps/chosen": -3542.98046875, + "logps/rejected": -3542.98046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2721862792969, + "rewards/margins": 0.0, + "rewards/rejected": -351.2721862792969, + "step": 905 + }, + { + "epoch": 9.536842105263158, + "grad_norm": 2.079559862977476e-06, + "learning_rate": 0.00018109473684210526, + "logits/chosen": 13.196248054504395, + "logits/rejected": 13.196248054504395, + "logps/chosen": -3779.36328125, + "logps/rejected": -3779.36328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0672302246094, + "rewards/margins": 0.0, + "rewards/rejected": -375.0672302246094, + "step": 906 + }, + { + "epoch": 9.547368421052632, + "grad_norm": 1.8719908894127002e-06, + "learning_rate": 0.00018107368421052634, + "logits/chosen": 13.248462677001953, + "logits/rejected": 13.248462677001953, + "logps/chosen": -4881.23779296875, + "logps/rejected": -4881.23779296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.276611328125, + "rewards/margins": 0.0, + "rewards/rejected": -485.276611328125, + "step": 907 + }, + { + "epoch": 9.557894736842105, + "grad_norm": 1.5655275547032943e-06, + "learning_rate": 0.00018105263157894739, + "logits/chosen": 13.20699405670166, + "logits/rejected": 13.20699405670166, + "logps/chosen": -4287.06298828125, + "logps/rejected": -4287.06298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.908935546875, + "rewards/margins": 0.0, + "rewards/rejected": -425.908935546875, + "step": 908 + }, + { + "epoch": 9.568421052631578, + "grad_norm": 1.6612819990768912e-06, + "learning_rate": 0.0001810315789473684, + "logits/chosen": 13.194329261779785, + "logits/rejected": 13.194329261779785, + "logps/chosen": -2674.3115234375, + "logps/rejected": -2674.3115234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6495666503906, + "rewards/margins": 0.0, + "rewards/rejected": -264.6495666503906, + "step": 909 + }, + { + "epoch": 9.578947368421053, + "grad_norm": 5.043406417826191e-06, + "learning_rate": 0.00018101052631578948, + "logits/chosen": 13.264094352722168, + "logits/rejected": 13.264094352722168, + "logps/chosen": -5171.0244140625, + "logps/rejected": -5171.0244140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.1640014648438, + "rewards/margins": 0.0, + "rewards/rejected": -514.1640014648438, + "step": 910 + }, + { + "epoch": 9.589473684210526, + "grad_norm": 1.3647395462612621e-06, + "learning_rate": 0.00018098947368421053, + "logits/chosen": 13.19222354888916, + "logits/rejected": 13.19222354888916, + "logps/chosen": -3998.6875, + "logps/rejected": -3998.6875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.04144287109375, + "rewards/margins": 0.0, + "rewards/rejected": -397.04144287109375, + "step": 911 + }, + { + "epoch": 9.6, + "grad_norm": 3.274992650403874e-06, + "learning_rate": 0.00018096842105263158, + "logits/chosen": 13.244412422180176, + "logits/rejected": 13.244412422180176, + "logps/chosen": -5171.88427734375, + "logps/rejected": -5171.88427734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.25, + "rewards/margins": 0.0, + "rewards/rejected": -514.25, + "step": 912 + }, + { + "epoch": 9.610526315789473, + "grad_norm": 1.3367305200517876e-06, + "learning_rate": 0.00018094736842105263, + "logits/chosen": 13.1802978515625, + "logits/rejected": 13.1802978515625, + "logps/chosen": -3543.05859375, + "logps/rejected": -3543.05859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2799987792969, + "rewards/margins": 0.0, + "rewards/rejected": -351.2799987792969, + "step": 913 + }, + { + "epoch": 9.621052631578948, + "grad_norm": 2.5271988306485582e-06, + "learning_rate": 0.0001809263157894737, + "logits/chosen": 13.175594329833984, + "logits/rejected": 13.175594329833984, + "logps/chosen": -3998.55078125, + "logps/rejected": -3998.55078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.02777099609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.02777099609375, + "step": 914 + }, + { + "epoch": 9.631578947368421, + "grad_norm": 1.7182225064971135e-06, + "learning_rate": 0.00018090526315789476, + "logits/chosen": 13.172041893005371, + "logits/rejected": 13.172041893005371, + "logps/chosen": -3998.62109375, + "logps/rejected": -3998.62109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0347900390625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0347900390625, + "step": 915 + }, + { + "epoch": 9.642105263157895, + "grad_norm": 8.808316351860412e-07, + "learning_rate": 0.00018088421052631578, + "logits/chosen": 13.164517402648926, + "logits/rejected": 13.164517402648926, + "logps/chosen": -2968.2216796875, + "logps/rejected": -2968.2216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0959777832031, + "rewards/margins": 0.0, + "rewards/rejected": -294.0959777832031, + "step": 916 + }, + { + "epoch": 9.652631578947368, + "grad_norm": 1.9230894849897595e-06, + "learning_rate": 0.00018086315789473686, + "logits/chosen": 13.156488418579102, + "logits/rejected": 13.156488418579102, + "logps/chosen": -3998.646484375, + "logps/rejected": -3998.646484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0373229980469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0373229980469, + "step": 917 + }, + { + "epoch": 9.663157894736843, + "grad_norm": 2.5991857910412364e-06, + "learning_rate": 0.0001808421052631579, + "logits/chosen": 13.193648338317871, + "logits/rejected": 13.193648338317871, + "logps/chosen": -4880.2578125, + "logps/rejected": -4880.2578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1786193847656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1786193847656, + "step": 918 + }, + { + "epoch": 9.673684210526316, + "grad_norm": 1.4714457847730955e-06, + "learning_rate": 0.00018082105263157895, + "logits/chosen": 13.170114517211914, + "logits/rejected": 13.170114517211914, + "logps/chosen": -4325.29296875, + "logps/rejected": -4325.29296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3444519042969, + "rewards/margins": 0.0, + "rewards/rejected": -429.3444519042969, + "step": 919 + }, + { + "epoch": 9.68421052631579, + "grad_norm": 1.1151461194458534e-06, + "learning_rate": 0.0001808, + "logits/chosen": 13.122346878051758, + "logits/rejected": 13.122346878051758, + "logps/chosen": -3999.84765625, + "logps/rejected": -3999.84765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1574401855469, + "rewards/margins": 0.0, + "rewards/rejected": -397.1574401855469, + "step": 920 + }, + { + "epoch": 9.694736842105263, + "grad_norm": 1.959126848305459e-06, + "learning_rate": 0.00018077894736842108, + "logits/chosen": 13.17595100402832, + "logits/rejected": 13.17595100402832, + "logps/chosen": -5173.25, + "logps/rejected": -5173.25, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3865966796875, + "rewards/margins": 0.0, + "rewards/rejected": -514.3865966796875, + "step": 921 + }, + { + "epoch": 9.705263157894738, + "grad_norm": 1.5191129705272033e-06, + "learning_rate": 0.0001807578947368421, + "logits/chosen": 13.107925415039062, + "logits/rejected": 13.107925415039062, + "logps/chosen": -4000.228515625, + "logps/rejected": -4000.228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1955261230469, + "rewards/margins": 0.0, + "rewards/rejected": -397.1955261230469, + "step": 922 + }, + { + "epoch": 9.715789473684211, + "grad_norm": 8.850959147821413e-07, + "learning_rate": 0.00018073684210526315, + "logits/chosen": 13.103530883789062, + "logits/rejected": 13.103530883789062, + "logps/chosen": -2967.771484375, + "logps/rejected": -2967.771484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.05096435546875, + "rewards/margins": 0.0, + "rewards/rejected": -294.05096435546875, + "step": 923 + }, + { + "epoch": 9.726315789473684, + "grad_norm": 1.1583940704440465e-06, + "learning_rate": 0.00018071578947368423, + "logits/chosen": 13.108007431030273, + "logits/rejected": 13.108007431030273, + "logps/chosen": -3758.005859375, + "logps/rejected": -3758.005859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8822937011719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8822937011719, + "step": 924 + }, + { + "epoch": 9.736842105263158, + "grad_norm": 1.7935511777977808e-06, + "learning_rate": 0.00018069473684210528, + "logits/chosen": 13.160980224609375, + "logits/rejected": 13.160980224609375, + "logps/chosen": -5173.95361328125, + "logps/rejected": -5173.95361328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4569702148438, + "rewards/margins": 0.0, + "rewards/rejected": -514.4569702148438, + "step": 925 + }, + { + "epoch": 9.74736842105263, + "grad_norm": 1.3837476444678032e-06, + "learning_rate": 0.00018067368421052633, + "logits/chosen": 13.14988899230957, + "logits/rejected": 13.14988899230957, + "logps/chosen": -4879.6416015625, + "logps/rejected": -4879.6416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.11700439453125, + "rewards/margins": 0.0, + "rewards/rejected": -485.11700439453125, + "step": 926 + }, + { + "epoch": 9.757894736842106, + "grad_norm": 1.2462518270695e-06, + "learning_rate": 0.00018065263157894738, + "logits/chosen": 13.096955299377441, + "logits/rejected": 13.096955299377441, + "logps/chosen": -2673.61328125, + "logps/rejected": -2673.61328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5797424316406, + "rewards/margins": 0.0, + "rewards/rejected": -264.5797424316406, + "step": 927 + }, + { + "epoch": 9.76842105263158, + "grad_norm": 1.6706468386473716e-06, + "learning_rate": 0.00018063157894736845, + "logits/chosen": 13.113860130310059, + "logits/rejected": 13.113860130310059, + "logps/chosen": -4287.2919921875, + "logps/rejected": -4287.2919921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9318542480469, + "rewards/margins": 0.0, + "rewards/rejected": -425.9318542480469, + "step": 928 + }, + { + "epoch": 9.778947368421052, + "grad_norm": 1.5169850939855678e-06, + "learning_rate": 0.00018061052631578947, + "logits/chosen": 13.162243843078613, + "logits/rejected": 13.162243843078613, + "logps/chosen": -4879.82958984375, + "logps/rejected": -4879.82958984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.13580322265625, + "rewards/margins": 0.0, + "rewards/rejected": -485.13580322265625, + "step": 929 + }, + { + "epoch": 9.789473684210526, + "grad_norm": 1.4269896837504348e-06, + "learning_rate": 0.00018058947368421052, + "logits/chosen": 13.114461898803711, + "logits/rejected": 13.114461898803711, + "logps/chosen": -3777.1953125, + "logps/rejected": -3777.1953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8504333496094, + "rewards/margins": 0.0, + "rewards/rejected": -374.8504333496094, + "step": 930 + }, + { + "epoch": 9.8, + "grad_norm": 1.0124748541784356e-06, + "learning_rate": 0.0001805684210526316, + "logits/chosen": 13.113466262817383, + "logits/rejected": 13.113466262817383, + "logps/chosen": -2673.595703125, + "logps/rejected": -2673.595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5779724121094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5779724121094, + "step": 931 + }, + { + "epoch": 9.810526315789474, + "grad_norm": 1.7347774701192975e-06, + "learning_rate": 0.00018054736842105265, + "logits/chosen": 13.17525863647461, + "logits/rejected": 13.17525863647461, + "logps/chosen": -4880.5234375, + "logps/rejected": -4880.5234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2051696777344, + "rewards/margins": 0.0, + "rewards/rejected": -485.2051696777344, + "step": 932 + }, + { + "epoch": 9.821052631578947, + "grad_norm": 7.927322371870105e-07, + "learning_rate": 0.0001805263157894737, + "logits/chosen": 13.13425064086914, + "logits/rejected": 13.13425064086914, + "logps/chosen": -2967.8828125, + "logps/rejected": -2967.8828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0621032714844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0621032714844, + "step": 933 + }, + { + "epoch": 9.83157894736842, + "grad_norm": 1.0932701570709469e-06, + "learning_rate": 0.00018050526315789475, + "logits/chosen": 13.151144027709961, + "logits/rejected": 13.151144027709961, + "logps/chosen": -3758.41015625, + "logps/rejected": -3758.41015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9226989746094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9226989746094, + "step": 934 + }, + { + "epoch": 9.842105263157894, + "grad_norm": 1.906367288029287e-06, + "learning_rate": 0.0001804842105263158, + "logits/chosen": 13.213872909545898, + "logits/rejected": 13.213872909545898, + "logps/chosen": -5175.31494140625, + "logps/rejected": -5175.31494140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5930786132812, + "rewards/margins": 0.0, + "rewards/rejected": -514.5930786132812, + "step": 935 + }, + { + "epoch": 9.852631578947369, + "grad_norm": 1.2445665333871148e-06, + "learning_rate": 0.00018046315789473685, + "logits/chosen": 13.160307884216309, + "logits/rejected": 13.160307884216309, + "logps/chosen": -3999.84375, + "logps/rejected": -3999.84375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1570739746094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1570739746094, + "step": 936 + }, + { + "epoch": 9.863157894736842, + "grad_norm": 1.4144438864605036e-06, + "learning_rate": 0.0001804421052631579, + "logits/chosen": 13.167046546936035, + "logits/rejected": 13.167046546936035, + "logps/chosen": -3777.84375, + "logps/rejected": -3777.84375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9152526855469, + "rewards/margins": 0.0, + "rewards/rejected": -374.9152526855469, + "step": 937 + }, + { + "epoch": 9.873684210526315, + "grad_norm": 1.6998163800963084e-06, + "learning_rate": 0.00018042105263157894, + "logits/chosen": 13.238704681396484, + "logits/rejected": 13.238704681396484, + "logps/chosen": -5175.4814453125, + "logps/rejected": -5175.4814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6097412109375, + "rewards/margins": 0.0, + "rewards/rejected": -514.6097412109375, + "step": 938 + }, + { + "epoch": 9.884210526315789, + "grad_norm": 1.5435953173437156e-06, + "learning_rate": 0.00018040000000000002, + "logits/chosen": 13.185133934020996, + "logits/rejected": 13.185133934020996, + "logps/chosen": -4288.92333984375, + "logps/rejected": -4288.92333984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.094970703125, + "rewards/margins": 0.0, + "rewards/rejected": -426.094970703125, + "step": 939 + }, + { + "epoch": 9.894736842105264, + "grad_norm": 1.576958652549365e-06, + "learning_rate": 0.00018037894736842107, + "logits/chosen": 13.179280281066895, + "logits/rejected": 13.179280281066895, + "logps/chosen": -3778.392578125, + "logps/rejected": -3778.392578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.97015380859375, + "rewards/margins": 0.0, + "rewards/rejected": -374.97015380859375, + "step": 940 + }, + { + "epoch": 9.905263157894737, + "grad_norm": 1.4728045698575443e-06, + "learning_rate": 0.0001803578947368421, + "logits/chosen": 13.18721866607666, + "logits/rejected": 13.18721866607666, + "logps/chosen": -4289.30419921875, + "logps/rejected": -4289.30419921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.133056640625, + "rewards/margins": 0.0, + "rewards/rejected": -426.133056640625, + "step": 941 + }, + { + "epoch": 9.91578947368421, + "grad_norm": 1.2911617659483454e-06, + "learning_rate": 0.00018033684210526317, + "logits/chosen": 13.178040504455566, + "logits/rejected": 13.178040504455566, + "logps/chosen": -3999.052734375, + "logps/rejected": -3999.052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0779724121094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0779724121094, + "step": 942 + }, + { + "epoch": 9.926315789473684, + "grad_norm": 1.1795951877502375e-06, + "learning_rate": 0.00018031578947368422, + "logits/chosen": 13.176817893981934, + "logits/rejected": 13.176817893981934, + "logps/chosen": -3541.595703125, + "logps/rejected": -3541.595703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1336975097656, + "rewards/margins": 0.0, + "rewards/rejected": -351.1336975097656, + "step": 943 + }, + { + "epoch": 9.936842105263159, + "grad_norm": 1.463585476813023e-06, + "learning_rate": 0.00018029473684210527, + "logits/chosen": 13.174623489379883, + "logits/rejected": 13.174623489379883, + "logps/chosen": -4290.2265625, + "logps/rejected": -4290.2265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.2253112792969, + "rewards/margins": 0.0, + "rewards/rejected": -426.2253112792969, + "step": 944 + }, + { + "epoch": 9.947368421052632, + "grad_norm": 1.9414749203860993e-06, + "learning_rate": 0.00018027368421052632, + "logits/chosen": 13.215839385986328, + "logits/rejected": 13.215839385986328, + "logps/chosen": -4880.64794921875, + "logps/rejected": -4880.64794921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2176208496094, + "rewards/margins": 0.0, + "rewards/rejected": -485.2176208496094, + "step": 945 + }, + { + "epoch": 9.957894736842105, + "grad_norm": 1.4988735301812994e-06, + "learning_rate": 0.0001802526315789474, + "logits/chosen": 13.165823936462402, + "logits/rejected": 13.165823936462402, + "logps/chosen": -3779.154296875, + "logps/rejected": -3779.154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.04632568359375, + "rewards/margins": 0.0, + "rewards/rejected": -375.04632568359375, + "step": 946 + }, + { + "epoch": 9.968421052631578, + "grad_norm": 1.4839308732916834e-06, + "learning_rate": 0.00018023157894736844, + "logits/chosen": 13.233963012695312, + "logits/rejected": 13.233963012695312, + "logps/chosen": -5174.9248046875, + "logps/rejected": -5174.9248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5540771484375, + "rewards/margins": 0.0, + "rewards/rejected": -514.5540771484375, + "step": 947 + }, + { + "epoch": 9.978947368421053, + "grad_norm": 1.2255666206328897e-06, + "learning_rate": 0.00018021052631578946, + "logits/chosen": 13.172708511352539, + "logits/rejected": 13.172708511352539, + "logps/chosen": -3998.29296875, + "logps/rejected": -3998.29296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0019836425781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0019836425781, + "step": 948 + }, + { + "epoch": 9.989473684210527, + "grad_norm": 1.206728256875067e-06, + "learning_rate": 0.00018018947368421054, + "logits/chosen": 13.182438850402832, + "logits/rejected": 13.182438850402832, + "logps/chosen": -3758.41015625, + "logps/rejected": -3758.41015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9226989746094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9226989746094, + "step": 949 + }, + { + "epoch": 10.0, + "grad_norm": 1.5243218740579323e-06, + "learning_rate": 0.0001801684210526316, + "logits/chosen": 13.168974876403809, + "logits/rejected": 13.168974876403809, + "logps/chosen": -3998.42578125, + "logps/rejected": -3998.42578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0152587890625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0152587890625, + "step": 950 + }, + { + "epoch": 10.0, + "eval_logits/chosen": 13.194429397583008, + "eval_logits/rejected": 13.194429397583008, + "eval_logps/chosen": -4310.5576171875, + "eval_logps/rejected": -4310.5576171875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.15252685546875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.15252685546875, + "eval_runtime": 4.5454, + "eval_samples_per_second": 2.2, + "eval_steps_per_second": 2.2, + "step": 950 + }, + { + "epoch": 10.010526315789473, + "grad_norm": 1.4693589491798775e-06, + "learning_rate": 0.00018014736842105264, + "logits/chosen": 13.159584999084473, + "logits/rejected": 13.159584999084473, + "logps/chosen": -3780.01953125, + "logps/rejected": -3780.01953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1328430175781, + "rewards/margins": 0.0, + "rewards/rejected": -375.1328430175781, + "step": 951 + }, + { + "epoch": 10.021052631578947, + "grad_norm": 1.1617106565608992e-06, + "learning_rate": 0.0001801263157894737, + "logits/chosen": 13.150762557983398, + "logits/rejected": 13.150762557983398, + "logps/chosen": -3998.5390625, + "logps/rejected": -3998.5390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0265808105469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0265808105469, + "step": 952 + }, + { + "epoch": 10.031578947368422, + "grad_norm": 1.0501075848878827e-06, + "learning_rate": 0.00018010526315789477, + "logits/chosen": 13.132506370544434, + "logits/rejected": 13.132506370544434, + "logps/chosen": -2672.892578125, + "logps/rejected": -2672.892578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5076599121094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5076599121094, + "step": 953 + }, + { + "epoch": 10.042105263157895, + "grad_norm": 8.266853228633408e-07, + "learning_rate": 0.0001800842105263158, + "logits/chosen": 13.129226684570312, + "logits/rejected": 13.129226684570312, + "logps/chosen": -2967.841796875, + "logps/rejected": -2967.841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0579833984375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0579833984375, + "step": 954 + }, + { + "epoch": 10.052631578947368, + "grad_norm": 1.613939616618154e-06, + "learning_rate": 0.00018006315789473684, + "logits/chosen": 13.123612403869629, + "logits/rejected": 13.123612403869629, + "logps/chosen": -4291.8515625, + "logps/rejected": -4291.8515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.3877868652344, + "rewards/margins": 0.0, + "rewards/rejected": -426.3877868652344, + "step": 955 + }, + { + "epoch": 10.063157894736841, + "grad_norm": 1.5553962384728948e-06, + "learning_rate": 0.0001800421052631579, + "logits/chosen": 13.108367919921875, + "logits/rejected": 13.108367919921875, + "logps/chosen": -3781.0693359375, + "logps/rejected": -3781.0693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.2378234863281, + "rewards/margins": 0.0, + "rewards/rejected": -375.2378234863281, + "step": 956 + }, + { + "epoch": 10.073684210526316, + "grad_norm": 9.385697126162995e-07, + "learning_rate": 0.00018002105263157896, + "logits/chosen": 13.097867965698242, + "logits/rejected": 13.097867965698242, + "logps/chosen": -2673.2138671875, + "logps/rejected": -2673.2138671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.539794921875, + "rewards/margins": 0.0, + "rewards/rejected": -264.539794921875, + "step": 957 + }, + { + "epoch": 10.08421052631579, + "grad_norm": 1.2404476592564606e-06, + "learning_rate": 0.00018, + "logits/chosen": 13.106371879577637, + "logits/rejected": 13.106371879577637, + "logps/chosen": -4292.078125, + "logps/rejected": -4292.078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.41046142578125, + "rewards/margins": 0.0, + "rewards/rejected": -426.41046142578125, + "step": 958 + }, + { + "epoch": 10.094736842105263, + "grad_norm": 1.864947648755333e-06, + "learning_rate": 0.00017997894736842106, + "logits/chosen": 13.161351203918457, + "logits/rejected": 13.161351203918457, + "logps/chosen": -5172.3759765625, + "logps/rejected": -5172.3759765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2991943359375, + "rewards/margins": 0.0, + "rewards/rejected": -514.2991943359375, + "step": 959 + }, + { + "epoch": 10.105263157894736, + "grad_norm": 2.015239260799717e-06, + "learning_rate": 0.0001799578947368421, + "logits/chosen": 13.162423133850098, + "logits/rejected": 13.162423133850098, + "logps/chosen": -5172.1923828125, + "logps/rejected": -5172.1923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2808227539062, + "rewards/margins": 0.0, + "rewards/rejected": -514.2808227539062, + "step": 960 + }, + { + "epoch": 10.115789473684211, + "grad_norm": 1.4148392892820993e-06, + "learning_rate": 0.00017993684210526316, + "logits/chosen": 13.10560131072998, + "logits/rejected": 13.10560131072998, + "logps/chosen": -3540.525390625, + "logps/rejected": -3540.525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.02667236328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.02667236328125, + "step": 961 + }, + { + "epoch": 10.126315789473685, + "grad_norm": 1.7931035927176708e-06, + "learning_rate": 0.0001799157894736842, + "logits/chosen": 13.172406196594238, + "logits/rejected": 13.172406196594238, + "logps/chosen": -5172.849609375, + "logps/rejected": -5172.849609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3465576171875, + "rewards/margins": 0.0, + "rewards/rejected": -514.3465576171875, + "step": 962 + }, + { + "epoch": 10.136842105263158, + "grad_norm": 1.007341325021116e-06, + "learning_rate": 0.00017989473684210528, + "logits/chosen": 13.121959686279297, + "logits/rejected": 13.121959686279297, + "logps/chosen": -3541.4306640625, + "logps/rejected": -3541.4306640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1171875, + "rewards/margins": 0.0, + "rewards/rejected": -351.1171875, + "step": 963 + }, + { + "epoch": 10.147368421052631, + "grad_norm": 8.368168664674158e-07, + "learning_rate": 0.00017987368421052633, + "logits/chosen": 13.122076988220215, + "logits/rejected": 13.122076988220215, + "logps/chosen": -2674.181640625, + "logps/rejected": -2674.181640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6365661621094, + "rewards/margins": 0.0, + "rewards/rejected": -264.6365661621094, + "step": 964 + }, + { + "epoch": 10.157894736842104, + "grad_norm": 1.775458144948061e-06, + "learning_rate": 0.00017985263157894738, + "logits/chosen": 13.139272689819336, + "logits/rejected": 13.139272689819336, + "logps/chosen": -3997.744140625, + "logps/rejected": -3997.744140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9471130371094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9471130371094, + "step": 965 + }, + { + "epoch": 10.16842105263158, + "grad_norm": 8.284944215120049e-07, + "learning_rate": 0.00017983157894736843, + "logits/chosen": 13.147418022155762, + "logits/rejected": 13.147418022155762, + "logps/chosen": -2968.232421875, + "logps/rejected": -2968.232421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0970458984375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0970458984375, + "step": 966 + }, + { + "epoch": 10.178947368421053, + "grad_norm": 9.165895562546211e-07, + "learning_rate": 0.00017981052631578948, + "logits/chosen": 13.15627384185791, + "logits/rejected": 13.15627384185791, + "logps/chosen": -3542.287109375, + "logps/rejected": -3542.287109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2028503417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.2028503417969, + "step": 967 + }, + { + "epoch": 10.189473684210526, + "grad_norm": 7.98844666860532e-07, + "learning_rate": 0.00017978947368421053, + "logits/chosen": 13.160597801208496, + "logits/rejected": 13.160597801208496, + "logps/chosen": -2968.58984375, + "logps/rejected": -2968.58984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1327819824219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1327819824219, + "step": 968 + }, + { + "epoch": 10.2, + "grad_norm": 1.2824373243347509e-06, + "learning_rate": 0.00017976842105263158, + "logits/chosen": 13.165441513061523, + "logits/rejected": 13.165441513061523, + "logps/chosen": -3997.59765625, + "logps/rejected": -3997.59765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9324645996094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9324645996094, + "step": 969 + }, + { + "epoch": 10.210526315789474, + "grad_norm": 1.0446786973261624e-06, + "learning_rate": 0.00017974736842105263, + "logits/chosen": 13.174192428588867, + "logits/rejected": 13.174192428588867, + "logps/chosen": -3543.2490234375, + "logps/rejected": -3543.2490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2990417480469, + "rewards/margins": 0.0, + "rewards/rejected": -351.2990417480469, + "step": 970 + }, + { + "epoch": 10.221052631578948, + "grad_norm": 1.2605731853909674e-06, + "learning_rate": 0.0001797263157894737, + "logits/chosen": 13.172664642333984, + "logits/rejected": 13.172664642333984, + "logps/chosen": -3997.59375, + "logps/rejected": -3997.59375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.93206787109375, + "rewards/margins": 0.0, + "rewards/rejected": -396.93206787109375, + "step": 971 + }, + { + "epoch": 10.23157894736842, + "grad_norm": 1.0253037316942937e-06, + "learning_rate": 0.00017970526315789476, + "logits/chosen": 13.179628372192383, + "logits/rejected": 13.179628372192383, + "logps/chosen": -3543.8369140625, + "logps/rejected": -3543.8369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3578186035156, + "rewards/margins": 0.0, + "rewards/rejected": -351.3578186035156, + "step": 972 + }, + { + "epoch": 10.242105263157894, + "grad_norm": 1.3200043440519948e-06, + "learning_rate": 0.00017968421052631578, + "logits/chosen": 13.17689037322998, + "logits/rejected": 13.17689037322998, + "logps/chosen": -3779.640625, + "logps/rejected": -3779.640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0949401855469, + "rewards/margins": 0.0, + "rewards/rejected": -375.0949401855469, + "step": 973 + }, + { + "epoch": 10.25263157894737, + "grad_norm": 8.757655223234906e-07, + "learning_rate": 0.00017966315789473685, + "logits/chosen": 13.185495376586914, + "logits/rejected": 13.185495376586914, + "logps/chosen": -3544.4033203125, + "logps/rejected": -3544.4033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4144592285156, + "rewards/margins": 0.0, + "rewards/rejected": -351.4144592285156, + "step": 974 + }, + { + "epoch": 10.263157894736842, + "grad_norm": 1.3463446748573915e-06, + "learning_rate": 0.0001796421052631579, + "logits/chosen": 13.184090614318848, + "logits/rejected": 13.184090614318848, + "logps/chosen": -3998.13671875, + "logps/rejected": -3998.13671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9863586425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.9863586425781, + "step": 975 + }, + { + "epoch": 10.273684210526316, + "grad_norm": 1.8277658000442898e-06, + "learning_rate": 0.00017962105263157895, + "logits/chosen": 13.236319541931152, + "logits/rejected": 13.236319541931152, + "logps/chosen": -4876.94091796875, + "logps/rejected": -4876.94091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.846923828125, + "rewards/margins": 0.0, + "rewards/rejected": -484.846923828125, + "step": 976 + }, + { + "epoch": 10.284210526315789, + "grad_norm": 1.055819097928179e-06, + "learning_rate": 0.0001796, + "logits/chosen": 13.196540832519531, + "logits/rejected": 13.196540832519531, + "logps/chosen": -3757.6884765625, + "logps/rejected": -3757.6884765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8505554199219, + "rewards/margins": 0.0, + "rewards/rejected": -372.8505554199219, + "step": 977 + }, + { + "epoch": 10.294736842105262, + "grad_norm": 1.4574362694474985e-06, + "learning_rate": 0.00017957894736842108, + "logits/chosen": 13.253937721252441, + "logits/rejected": 13.253937721252441, + "logps/chosen": -5175.04248046875, + "logps/rejected": -5175.04248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5658569335938, + "rewards/margins": 0.0, + "rewards/rejected": -514.5658569335938, + "step": 978 + }, + { + "epoch": 10.305263157894737, + "grad_norm": 1.3784009524897556e-06, + "learning_rate": 0.00017955789473684213, + "logits/chosen": 13.242537498474121, + "logits/rejected": 13.242537498474121, + "logps/chosen": -4877.05078125, + "logps/rejected": -4877.05078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.85791015625, + "rewards/margins": 0.0, + "rewards/rejected": -484.85791015625, + "step": 979 + }, + { + "epoch": 10.31578947368421, + "grad_norm": 1.3029579122303403e-06, + "learning_rate": 0.00017953684210526315, + "logits/chosen": 13.20040512084961, + "logits/rejected": 13.20040512084961, + "logps/chosen": -4289.892578125, + "logps/rejected": -4289.892578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.19189453125, + "rewards/margins": 0.0, + "rewards/rejected": -426.19189453125, + "step": 980 + }, + { + "epoch": 10.326315789473684, + "grad_norm": 1.6156998299265979e-06, + "learning_rate": 0.00017951578947368423, + "logits/chosen": 13.245430946350098, + "logits/rejected": 13.245430946350098, + "logps/chosen": -4877.677734375, + "logps/rejected": -4877.677734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9206237792969, + "rewards/margins": 0.0, + "rewards/rejected": -484.9206237792969, + "step": 981 + }, + { + "epoch": 10.336842105263157, + "grad_norm": 1.3154404996384983e-06, + "learning_rate": 0.00017949473684210528, + "logits/chosen": 13.232123374938965, + "logits/rejected": 13.232123374938965, + "logps/chosen": -4326.23046875, + "logps/rejected": -4326.23046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4382019042969, + "rewards/margins": 0.0, + "rewards/rejected": -429.4382019042969, + "step": 982 + }, + { + "epoch": 10.347368421052632, + "grad_norm": 1.483234314036963e-06, + "learning_rate": 0.00017947368421052632, + "logits/chosen": 13.189722061157227, + "logits/rejected": 13.189722061157227, + "logps/chosen": -3998.69140625, + "logps/rejected": -3998.69140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0418395996094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0418395996094, + "step": 983 + }, + { + "epoch": 10.357894736842105, + "grad_norm": 1.1578424619074212e-06, + "learning_rate": 0.00017945263157894737, + "logits/chosen": 13.174674987792969, + "logits/rejected": 13.174674987792969, + "logps/chosen": -2673.5751953125, + "logps/rejected": -2673.5751953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.575927734375, + "rewards/margins": 0.0, + "rewards/rejected": -264.575927734375, + "step": 984 + }, + { + "epoch": 10.368421052631579, + "grad_norm": 9.803466127777938e-07, + "learning_rate": 0.00017943157894736845, + "logits/chosen": 13.18294906616211, + "logits/rejected": 13.18294906616211, + "logps/chosen": -3758.1171875, + "logps/rejected": -3758.1171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8934020996094, + "rewards/margins": 0.0, + "rewards/rejected": -372.8934020996094, + "step": 985 + }, + { + "epoch": 10.378947368421052, + "grad_norm": 1.6069811863417272e-06, + "learning_rate": 0.00017941052631578947, + "logits/chosen": 13.230050086975098, + "logits/rejected": 13.230050086975098, + "logps/chosen": -5175.53955078125, + "logps/rejected": -5175.53955078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6155395507812, + "rewards/margins": 0.0, + "rewards/rejected": -514.6155395507812, + "step": 986 + }, + { + "epoch": 10.389473684210527, + "grad_norm": 1.5931537973301602e-06, + "learning_rate": 0.00017938947368421052, + "logits/chosen": 13.160172462463379, + "logits/rejected": 13.160172462463379, + "logps/chosen": -3778.580078125, + "logps/rejected": -3778.580078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9888916015625, + "rewards/margins": 0.0, + "rewards/rejected": -374.9888916015625, + "step": 987 + }, + { + "epoch": 10.4, + "grad_norm": 1.9772703581111273e-06, + "learning_rate": 0.0001793684210526316, + "logits/chosen": 13.211435317993164, + "logits/rejected": 13.211435317993164, + "logps/chosen": -4879.33349609375, + "logps/rejected": -4879.33349609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.086181640625, + "rewards/margins": 0.0, + "rewards/rejected": -485.086181640625, + "step": 988 + }, + { + "epoch": 10.410526315789474, + "grad_norm": 1.1682045624183957e-06, + "learning_rate": 0.00017934736842105265, + "logits/chosen": 13.163681983947754, + "logits/rejected": 13.163681983947754, + "logps/chosen": -3999.25, + "logps/rejected": -3999.25, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0976867675781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0976867675781, + "step": 989 + }, + { + "epoch": 10.421052631578947, + "grad_norm": 1.691668558123638e-06, + "learning_rate": 0.0001793263157894737, + "logits/chosen": 13.231330871582031, + "logits/rejected": 13.231330871582031, + "logps/chosen": -5175.73779296875, + "logps/rejected": -5175.73779296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6353759765625, + "rewards/margins": 0.0, + "rewards/rejected": -514.6353759765625, + "step": 990 + }, + { + "epoch": 10.431578947368422, + "grad_norm": 1.5367602372862166e-06, + "learning_rate": 0.00017930526315789475, + "logits/chosen": 13.220803260803223, + "logits/rejected": 13.220803260803223, + "logps/chosen": -4880.1689453125, + "logps/rejected": -4880.1689453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.16973876953125, + "rewards/margins": 0.0, + "rewards/rejected": -485.16973876953125, + "step": 991 + }, + { + "epoch": 10.442105263157895, + "grad_norm": 1.2719325468424358e-06, + "learning_rate": 0.0001792842105263158, + "logits/chosen": 13.18104362487793, + "logits/rejected": 13.18104362487793, + "logps/chosen": -3758.6015625, + "logps/rejected": -3758.6015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9418640136719, + "rewards/margins": 0.0, + "rewards/rejected": -372.9418640136719, + "step": 992 + }, + { + "epoch": 10.452631578947368, + "grad_norm": 1.4966909702707198e-06, + "learning_rate": 0.00017926315789473684, + "logits/chosen": 13.178886413574219, + "logits/rejected": 13.178886413574219, + "logps/chosen": -4288.48583984375, + "logps/rejected": -4288.48583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0512390136719, + "rewards/margins": 0.0, + "rewards/rejected": -426.0512390136719, + "step": 993 + }, + { + "epoch": 10.463157894736842, + "grad_norm": 1.3262939546621055e-06, + "learning_rate": 0.0001792421052631579, + "logits/chosen": 13.222832679748535, + "logits/rejected": 13.222832679748535, + "logps/chosen": -4880.97265625, + "logps/rejected": -4880.97265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2500915527344, + "rewards/margins": 0.0, + "rewards/rejected": -485.2500915527344, + "step": 994 + }, + { + "epoch": 10.473684210526315, + "grad_norm": 1.1435511169111123e-06, + "learning_rate": 0.00017922105263157897, + "logits/chosen": 13.222113609313965, + "logits/rejected": 13.222113609313965, + "logps/chosen": -4881.46630859375, + "logps/rejected": -4881.46630859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2994689941406, + "rewards/margins": 0.0, + "rewards/rejected": -485.2994689941406, + "step": 995 + }, + { + "epoch": 10.48421052631579, + "grad_norm": 1.5773370023453026e-06, + "learning_rate": 0.00017920000000000002, + "logits/chosen": 13.235569953918457, + "logits/rejected": 13.235569953918457, + "logps/chosen": -5175.8994140625, + "logps/rejected": -5175.8994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6515502929688, + "rewards/margins": 0.0, + "rewards/rejected": -514.6515502929688, + "step": 996 + }, + { + "epoch": 10.494736842105263, + "grad_norm": 1.402113412041217e-06, + "learning_rate": 0.00017917894736842107, + "logits/chosen": 13.211935043334961, + "logits/rejected": 13.211935043334961, + "logps/chosen": -4325.302734375, + "logps/rejected": -4325.302734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3454284667969, + "rewards/margins": 0.0, + "rewards/rejected": -429.3454284667969, + "step": 997 + }, + { + "epoch": 10.505263157894737, + "grad_norm": 1.350139541500539e-06, + "learning_rate": 0.00017915789473684212, + "logits/chosen": 13.21486759185791, + "logits/rejected": 13.21486759185791, + "logps/chosen": -4325.42578125, + "logps/rejected": -4325.42578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.35772705078125, + "rewards/margins": 0.0, + "rewards/rejected": -429.35772705078125, + "step": 998 + }, + { + "epoch": 10.51578947368421, + "grad_norm": 1.136086893893662e-06, + "learning_rate": 0.00017913684210526317, + "logits/chosen": 13.187580108642578, + "logits/rejected": 13.187580108642578, + "logps/chosen": -3759.060546875, + "logps/rejected": -3759.060546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9877624511719, + "rewards/margins": 0.0, + "rewards/rejected": -372.9877624511719, + "step": 999 + }, + { + "epoch": 10.526315789473685, + "grad_norm": 1.4942453390176524e-06, + "learning_rate": 0.00017911578947368422, + "logits/chosen": 13.178108215332031, + "logits/rejected": 13.178108215332031, + "logps/chosen": -3998.203125, + "logps/rejected": -3998.203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9930114746094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9930114746094, + "step": 1000 + }, + { + "epoch": 10.526315789473685, + "eval_logits/chosen": 13.210248947143555, + "eval_logits/rejected": 13.210248947143555, + "eval_logps/chosen": -4310.80810546875, + "eval_logps/rejected": -4310.80810546875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.17767333984375, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.17767333984375, + "eval_runtime": 4.3934, + "eval_samples_per_second": 2.276, + "eval_steps_per_second": 2.276, + "step": 1000 + }, + { + "epoch": 10.536842105263158, + "grad_norm": 1.3362285926632467e-06, + "learning_rate": 0.00017909473684210527, + "logits/chosen": 13.228311538696289, + "logits/rejected": 13.228311538696289, + "logps/chosen": -4882.1826171875, + "logps/rejected": -4882.1826171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.37109375, + "rewards/margins": 0.0, + "rewards/rejected": -485.37109375, + "step": 1001 + }, + { + "epoch": 10.547368421052632, + "grad_norm": 1.278688728234556e-06, + "learning_rate": 0.00017907368421052631, + "logits/chosen": 13.169454574584961, + "logits/rejected": 13.169454574584961, + "logps/chosen": -3998.1796875, + "logps/rejected": -3998.1796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.99066162109375, + "rewards/margins": 0.0, + "rewards/rejected": -396.99066162109375, + "step": 1002 + }, + { + "epoch": 10.557894736842105, + "grad_norm": 1.2729005902656354e-06, + "learning_rate": 0.0001790526315789474, + "logits/chosen": 13.21623420715332, + "logits/rejected": 13.21623420715332, + "logps/chosen": -4882.1455078125, + "logps/rejected": -4882.1455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3674011230469, + "rewards/margins": 0.0, + "rewards/rejected": -485.3674011230469, + "step": 1003 + }, + { + "epoch": 10.568421052631578, + "grad_norm": 1.3228390116637456e-06, + "learning_rate": 0.00017903157894736844, + "logits/chosen": 13.209847450256348, + "logits/rejected": 13.209847450256348, + "logps/chosen": -4882.48486328125, + "logps/rejected": -4882.48486328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.4013366699219, + "rewards/margins": 0.0, + "rewards/rejected": -485.4013366699219, + "step": 1004 + }, + { + "epoch": 10.578947368421053, + "grad_norm": 1.2972160448043724e-06, + "learning_rate": 0.00017901052631578946, + "logits/chosen": 13.191460609436035, + "logits/rejected": 13.191460609436035, + "logps/chosen": -4326.072265625, + "logps/rejected": -4326.072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.42236328125, + "rewards/margins": 0.0, + "rewards/rejected": -429.42236328125, + "step": 1005 + }, + { + "epoch": 10.589473684210526, + "grad_norm": 1.2427194633346517e-06, + "learning_rate": 0.00017898947368421054, + "logits/chosen": 13.139479637145996, + "logits/rejected": 13.139479637145996, + "logps/chosen": -2671.33984375, + "logps/rejected": -2671.33984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3523864746094, + "rewards/margins": 0.0, + "rewards/rejected": -264.3523864746094, + "step": 1006 + }, + { + "epoch": 10.6, + "grad_norm": 1.1622055353655014e-06, + "learning_rate": 0.0001789684210526316, + "logits/chosen": 13.137301445007324, + "logits/rejected": 13.137301445007324, + "logps/chosen": -2671.369140625, + "logps/rejected": -2671.369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3553161621094, + "rewards/margins": 0.0, + "rewards/rejected": -264.3553161621094, + "step": 1007 + }, + { + "epoch": 10.610526315789473, + "grad_norm": 1.1334084319969406e-06, + "learning_rate": 0.00017894736842105264, + "logits/chosen": 13.14943790435791, + "logits/rejected": 13.14943790435791, + "logps/chosen": -3540.439453125, + "logps/rejected": -3540.439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.01806640625, + "rewards/margins": 0.0, + "rewards/rejected": -351.01806640625, + "step": 1008 + }, + { + "epoch": 10.621052631578948, + "grad_norm": 1.0162290209336788e-06, + "learning_rate": 0.0001789263157894737, + "logits/chosen": 13.152040481567383, + "logits/rejected": 13.152040481567383, + "logps/chosen": -3540.490234375, + "logps/rejected": -3540.490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0231628417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.0231628417969, + "step": 1009 + }, + { + "epoch": 10.631578947368421, + "grad_norm": 1.959740302481805e-06, + "learning_rate": 0.00017890526315789476, + "logits/chosen": 13.152101516723633, + "logits/rejected": 13.152101516723633, + "logps/chosen": -3998.369140625, + "logps/rejected": -3998.369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0096130371094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0096130371094, + "step": 1010 + }, + { + "epoch": 10.642105263157895, + "grad_norm": 1.0505898444534978e-06, + "learning_rate": 0.00017888421052631579, + "logits/chosen": 13.162419319152832, + "logits/rejected": 13.162419319152832, + "logps/chosen": -3540.927734375, + "logps/rejected": -3540.927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.06689453125, + "rewards/margins": 0.0, + "rewards/rejected": -351.06689453125, + "step": 1011 + }, + { + "epoch": 10.652631578947368, + "grad_norm": 2.120922317772056e-06, + "learning_rate": 0.00017886315789473683, + "logits/chosen": 13.204904556274414, + "logits/rejected": 13.204904556274414, + "logps/chosen": -4326.396484375, + "logps/rejected": -4326.396484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4548034667969, + "rewards/margins": 0.0, + "rewards/rejected": -429.4548034667969, + "step": 1012 + }, + { + "epoch": 10.663157894736843, + "grad_norm": 1.5682876437494997e-06, + "learning_rate": 0.0001788421052631579, + "logits/chosen": 13.172957420349121, + "logits/rejected": 13.172957420349121, + "logps/chosen": -3776.220703125, + "logps/rejected": -3776.220703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7529602050781, + "rewards/margins": 0.0, + "rewards/rejected": -374.7529602050781, + "step": 1013 + }, + { + "epoch": 10.673684210526316, + "grad_norm": 2.4487833343300736e-06, + "learning_rate": 0.00017882105263157896, + "logits/chosen": 13.251319885253906, + "logits/rejected": 13.251319885253906, + "logps/chosen": -5175.296875, + "logps/rejected": -5175.296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5912475585938, + "rewards/margins": 0.0, + "rewards/rejected": -514.5912475585938, + "step": 1014 + }, + { + "epoch": 10.68421052631579, + "grad_norm": 1.9198505469830707e-06, + "learning_rate": 0.0001788, + "logits/chosen": 13.196731567382812, + "logits/rejected": 13.196731567382812, + "logps/chosen": -3542.06640625, + "logps/rejected": -3542.06640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1807556152344, + "rewards/margins": 0.0, + "rewards/rejected": -351.1807556152344, + "step": 1015 + }, + { + "epoch": 10.694736842105263, + "grad_norm": 1.5869774188104202e-06, + "learning_rate": 0.00017877894736842106, + "logits/chosen": 13.27206039428711, + "logits/rejected": 13.27206039428711, + "logps/chosen": -5175.7421875, + "logps/rejected": -5175.7421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6358032226562, + "rewards/margins": 0.0, + "rewards/rejected": -514.6358032226562, + "step": 1016 + }, + { + "epoch": 10.705263157894738, + "grad_norm": 9.356546684102796e-07, + "learning_rate": 0.00017875789473684213, + "logits/chosen": 13.20598316192627, + "logits/rejected": 13.20598316192627, + "logps/chosen": -2673.041015625, + "logps/rejected": -2673.041015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.52252197265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.52252197265625, + "step": 1017 + }, + { + "epoch": 10.715789473684211, + "grad_norm": 2.2187205104273744e-06, + "learning_rate": 0.00017873684210526316, + "logits/chosen": 13.23035717010498, + "logits/rejected": 13.23035717010498, + "logps/chosen": -4286.69921875, + "logps/rejected": -4286.69921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.87255859375, + "rewards/margins": 0.0, + "rewards/rejected": -425.87255859375, + "step": 1018 + }, + { + "epoch": 10.726315789473684, + "grad_norm": 8.783920861787919e-07, + "learning_rate": 0.0001787157894736842, + "logits/chosen": 13.237408638000488, + "logits/rejected": 13.237408638000488, + "logps/chosen": -3543.453125, + "logps/rejected": -3543.453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3194274902344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3194274902344, + "step": 1019 + }, + { + "epoch": 10.736842105263158, + "grad_norm": 1.9901049199688714e-06, + "learning_rate": 0.00017869473684210528, + "logits/chosen": 13.248115539550781, + "logits/rejected": 13.248115539550781, + "logps/chosen": -4287.0732421875, + "logps/rejected": -4287.0732421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.90997314453125, + "rewards/margins": 0.0, + "rewards/rejected": -425.90997314453125, + "step": 1020 + }, + { + "epoch": 10.74736842105263, + "grad_norm": 8.802184652267897e-07, + "learning_rate": 0.00017867368421052633, + "logits/chosen": 13.25467586517334, + "logits/rejected": 13.25467586517334, + "logps/chosen": -3544.111328125, + "logps/rejected": -3544.111328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.38525390625, + "rewards/margins": 0.0, + "rewards/rejected": -351.38525390625, + "step": 1021 + }, + { + "epoch": 10.757894736842106, + "grad_norm": 1.355450990558893e-06, + "learning_rate": 0.00017865263157894738, + "logits/chosen": 13.256010055541992, + "logits/rejected": 13.256010055541992, + "logps/chosen": -3997.22265625, + "logps/rejected": -3997.22265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.89495849609375, + "rewards/margins": 0.0, + "rewards/rejected": -396.89495849609375, + "step": 1022 + }, + { + "epoch": 10.76842105263158, + "grad_norm": 1.2228549621795537e-06, + "learning_rate": 0.00017863157894736843, + "logits/chosen": 13.251581192016602, + "logits/rejected": 13.251581192016602, + "logps/chosen": -2673.673828125, + "logps/rejected": -2673.673828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5857849121094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5857849121094, + "step": 1023 + }, + { + "epoch": 10.778947368421052, + "grad_norm": 1.1635554528766079e-06, + "learning_rate": 0.00017861052631578948, + "logits/chosen": 13.2525053024292, + "logits/rejected": 13.2525053024292, + "logps/chosen": -2673.8095703125, + "logps/rejected": -2673.8095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.599365234375, + "rewards/margins": 0.0, + "rewards/rejected": -264.599365234375, + "step": 1024 + }, + { + "epoch": 10.789473684210526, + "grad_norm": 1.3122258906150819e-06, + "learning_rate": 0.00017858947368421053, + "logits/chosen": 13.258501052856445, + "logits/rejected": 13.258501052856445, + "logps/chosen": -3997.408203125, + "logps/rejected": -3997.408203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.91351318359375, + "rewards/margins": 0.0, + "rewards/rejected": -396.91351318359375, + "step": 1025 + }, + { + "epoch": 10.8, + "grad_norm": 1.368787934552529e-06, + "learning_rate": 0.00017856842105263158, + "logits/chosen": 13.252470016479492, + "logits/rejected": 13.252470016479492, + "logps/chosen": -3997.669921875, + "logps/rejected": -3997.669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9396667480469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9396667480469, + "step": 1026 + }, + { + "epoch": 10.810526315789474, + "grad_norm": 2.3092115952749737e-06, + "learning_rate": 0.00017854736842105263, + "logits/chosen": 13.298238754272461, + "logits/rejected": 13.298238754272461, + "logps/chosen": -4878.5146484375, + "logps/rejected": -4878.5146484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0043029785156, + "rewards/margins": 0.0, + "rewards/rejected": -485.0043029785156, + "step": 1027 + }, + { + "epoch": 10.821052631578947, + "grad_norm": 8.208472763726604e-07, + "learning_rate": 0.0001785263157894737, + "logits/chosen": 13.237881660461426, + "logits/rejected": 13.237881660461426, + "logps/chosen": -2968.15625, + "logps/rejected": -2968.15625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0894470214844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0894470214844, + "step": 1028 + }, + { + "epoch": 10.83157894736842, + "grad_norm": 1.2605588608494145e-06, + "learning_rate": 0.00017850526315789475, + "logits/chosen": 13.226240158081055, + "logits/rejected": 13.226240158081055, + "logps/chosen": -3998.015625, + "logps/rejected": -3998.015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9742431640625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9742431640625, + "step": 1029 + }, + { + "epoch": 10.842105263157894, + "grad_norm": 1.4274274917625007e-06, + "learning_rate": 0.00017848421052631578, + "logits/chosen": 13.271389961242676, + "logits/rejected": 13.271389961242676, + "logps/chosen": -4878.86328125, + "logps/rejected": -4878.86328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0391540527344, + "rewards/margins": 0.0, + "rewards/rejected": -485.0391540527344, + "step": 1030 + }, + { + "epoch": 10.852631578947369, + "grad_norm": 1.3848475646227598e-06, + "learning_rate": 0.00017846315789473685, + "logits/chosen": 13.212285041809082, + "logits/rejected": 13.212285041809082, + "logps/chosen": -3544.8076171875, + "logps/rejected": -3544.8076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.45489501953125, + "rewards/margins": 0.0, + "rewards/rejected": -351.45489501953125, + "step": 1031 + }, + { + "epoch": 10.863157894736842, + "grad_norm": 1.1466543128335616e-06, + "learning_rate": 0.0001784421052631579, + "logits/chosen": 13.208029747009277, + "logits/rejected": 13.208029747009277, + "logps/chosen": -3757.9775390625, + "logps/rejected": -3757.9775390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.87945556640625, + "rewards/margins": 0.0, + "rewards/rejected": -372.87945556640625, + "step": 1032 + }, + { + "epoch": 10.873684210526315, + "grad_norm": 9.515599685983034e-07, + "learning_rate": 0.00017842105263157895, + "logits/chosen": 13.19658088684082, + "logits/rejected": 13.19658088684082, + "logps/chosen": -3544.865234375, + "logps/rejected": -3544.865234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4606628417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4606628417969, + "step": 1033 + }, + { + "epoch": 10.884210526315789, + "grad_norm": 7.903745995463396e-07, + "learning_rate": 0.0001784, + "logits/chosen": 13.18742561340332, + "logits/rejected": 13.18742561340332, + "logps/chosen": -2968.53125, + "logps/rejected": -2968.53125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1269226074219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1269226074219, + "step": 1034 + }, + { + "epoch": 10.894736842105264, + "grad_norm": 8.046241646297858e-07, + "learning_rate": 0.00017837894736842108, + "logits/chosen": 13.171792030334473, + "logits/rejected": 13.171792030334473, + "logps/chosen": -2675.59765625, + "logps/rejected": -2675.59765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7781677246094, + "rewards/margins": 0.0, + "rewards/rejected": -264.7781677246094, + "step": 1035 + }, + { + "epoch": 10.905263157894737, + "grad_norm": 2.012414597629686e-06, + "learning_rate": 0.00017835789473684213, + "logits/chosen": 13.174912452697754, + "logits/rejected": 13.174912452697754, + "logps/chosen": -3777.1904296875, + "logps/rejected": -3777.1904296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8499450683594, + "rewards/margins": 0.0, + "rewards/rejected": -374.8499450683594, + "step": 1036 + }, + { + "epoch": 10.91578947368421, + "grad_norm": 1.4943033193048905e-06, + "learning_rate": 0.00017833684210526315, + "logits/chosen": 13.182901382446289, + "logits/rejected": 13.182901382446289, + "logps/chosen": -3758.171875, + "logps/rejected": -3758.171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8988952636719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8988952636719, + "step": 1037 + }, + { + "epoch": 10.926315789473684, + "grad_norm": 1.9015815269085579e-06, + "learning_rate": 0.00017831578947368422, + "logits/chosen": 13.239189147949219, + "logits/rejected": 13.239189147949219, + "logps/chosen": -5174.32470703125, + "logps/rejected": -5174.32470703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4940795898438, + "rewards/margins": 0.0, + "rewards/rejected": -514.4940795898438, + "step": 1038 + }, + { + "epoch": 10.936842105263159, + "grad_norm": 1.4353944379763561e-06, + "learning_rate": 0.00017829473684210527, + "logits/chosen": 13.228001594543457, + "logits/rejected": 13.228001594543457, + "logps/chosen": -4878.841796875, + "logps/rejected": -4878.841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0370178222656, + "rewards/margins": 0.0, + "rewards/rejected": -485.0370178222656, + "step": 1039 + }, + { + "epoch": 10.947368421052632, + "grad_norm": 1.4171000657370314e-06, + "learning_rate": 0.00017827368421052632, + "logits/chosen": 13.177600860595703, + "logits/rejected": 13.177600860595703, + "logps/chosen": -2968.658203125, + "logps/rejected": -2968.658203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1396179199219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1396179199219, + "step": 1040 + }, + { + "epoch": 10.957894736842105, + "grad_norm": 3.215348897356307e-06, + "learning_rate": 0.00017825263157894737, + "logits/chosen": 13.24644947052002, + "logits/rejected": 13.24644947052002, + "logps/chosen": -5174.0419921875, + "logps/rejected": -5174.0419921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4657592773438, + "rewards/margins": 0.0, + "rewards/rejected": -514.4657592773438, + "step": 1041 + }, + { + "epoch": 10.968421052631578, + "grad_norm": 1.5377390809589997e-06, + "learning_rate": 0.00017823157894736845, + "logits/chosen": 13.180655479431152, + "logits/rejected": 13.180655479431152, + "logps/chosen": -3778.388671875, + "logps/rejected": -3778.388671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9697570800781, + "rewards/margins": 0.0, + "rewards/rejected": -374.9697570800781, + "step": 1042 + }, + { + "epoch": 10.978947368421053, + "grad_norm": 2.6887482817983255e-06, + "learning_rate": 0.00017821052631578947, + "logits/chosen": 13.249733924865723, + "logits/rejected": 13.249733924865723, + "logps/chosen": -5174.35498046875, + "logps/rejected": -5174.35498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4970703125, + "rewards/margins": 0.0, + "rewards/rejected": -514.4970703125, + "step": 1043 + }, + { + "epoch": 10.989473684210527, + "grad_norm": 1.5689623751313775e-06, + "learning_rate": 0.00017818947368421052, + "logits/chosen": 13.185819625854492, + "logits/rejected": 13.185819625854492, + "logps/chosen": -3998.734375, + "logps/rejected": -3998.734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0461120605469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0461120605469, + "step": 1044 + }, + { + "epoch": 11.0, + "grad_norm": 2.098635377478786e-06, + "learning_rate": 0.0001781684210526316, + "logits/chosen": 13.242274284362793, + "logits/rejected": 13.242274284362793, + "logps/chosen": -4879.4326171875, + "logps/rejected": -4879.4326171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0960998535156, + "rewards/margins": 0.0, + "rewards/rejected": -485.0960998535156, + "step": 1045 + }, + { + "epoch": 11.010526315789473, + "grad_norm": 1.1269537480984582e-06, + "learning_rate": 0.00017814736842105264, + "logits/chosen": 13.24583911895752, + "logits/rejected": 13.24583911895752, + "logps/chosen": -4879.86962890625, + "logps/rejected": -4879.86962890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1398010253906, + "rewards/margins": 0.0, + "rewards/rejected": -485.1398010253906, + "step": 1046 + }, + { + "epoch": 11.021052631578947, + "grad_norm": 2.0678728560596937e-06, + "learning_rate": 0.0001781263157894737, + "logits/chosen": 13.201807022094727, + "logits/rejected": 13.201807022094727, + "logps/chosen": -4288.85888671875, + "logps/rejected": -4288.85888671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0885314941406, + "rewards/margins": 0.0, + "rewards/rejected": -426.0885314941406, + "step": 1047 + }, + { + "epoch": 11.031578947368422, + "grad_norm": 2.1118066797498614e-06, + "learning_rate": 0.00017810526315789474, + "logits/chosen": 13.192779541015625, + "logits/rejected": 13.192779541015625, + "logps/chosen": -3998.36328125, + "logps/rejected": -3998.36328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0090026855469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0090026855469, + "step": 1048 + }, + { + "epoch": 11.042105263157895, + "grad_norm": 1.1340811170157394e-06, + "learning_rate": 0.00017808421052631582, + "logits/chosen": 13.19736385345459, + "logits/rejected": 13.19736385345459, + "logps/chosen": -3758.8935546875, + "logps/rejected": -3758.8935546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9710388183594, + "rewards/margins": 0.0, + "rewards/rejected": -372.9710388183594, + "step": 1049 + }, + { + "epoch": 11.052631578947368, + "grad_norm": 1.226817744282016e-06, + "learning_rate": 0.00017806315789473684, + "logits/chosen": 13.17660140991211, + "logits/rejected": 13.17660140991211, + "logps/chosen": -3998.458984375, + "logps/rejected": -3998.458984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0185852050781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0185852050781, + "step": 1050 + }, + { + "epoch": 11.052631578947368, + "eval_logits/chosen": 13.202532768249512, + "eval_logits/rejected": 13.202532768249512, + "eval_logps/chosen": -4310.796875, + "eval_logps/rejected": -4310.796875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.17645263671875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.17645263671875, + "eval_runtime": 4.5487, + "eval_samples_per_second": 2.198, + "eval_steps_per_second": 2.198, + "step": 1050 + }, + { + "epoch": 11.063157894736841, + "grad_norm": 8.642288662485953e-07, + "learning_rate": 0.0001780421052631579, + "logits/chosen": 13.158774375915527, + "logits/rejected": 13.158774375915527, + "logps/chosen": -2673.4619140625, + "logps/rejected": -2673.4619140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5646057128906, + "rewards/margins": 0.0, + "rewards/rejected": -264.5646057128906, + "step": 1051 + }, + { + "epoch": 11.073684210526316, + "grad_norm": 2.489583721398958e-06, + "learning_rate": 0.00017802105263157897, + "logits/chosen": 13.1621675491333, + "logits/rejected": 13.1621675491333, + "logps/chosen": -4288.85107421875, + "logps/rejected": -4288.85107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0877380371094, + "rewards/margins": 0.0, + "rewards/rejected": -426.0877380371094, + "step": 1052 + }, + { + "epoch": 11.08421052631579, + "grad_norm": 2.912012178057921e-06, + "learning_rate": 0.00017800000000000002, + "logits/chosen": 13.20124626159668, + "logits/rejected": 13.20124626159668, + "logps/chosen": -4880.75, + "logps/rejected": -4880.75, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.22784423828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.22784423828125, + "step": 1053 + }, + { + "epoch": 11.094736842105263, + "grad_norm": 1.1341369372530608e-06, + "learning_rate": 0.00017797894736842107, + "logits/chosen": 13.19956111907959, + "logits/rejected": 13.19956111907959, + "logps/chosen": -4881.3955078125, + "logps/rejected": -4881.3955078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2923889160156, + "rewards/margins": 0.0, + "rewards/rejected": -485.2923889160156, + "step": 1054 + }, + { + "epoch": 11.105263157894736, + "grad_norm": 2.466234946041368e-06, + "learning_rate": 0.00017795789473684212, + "logits/chosen": 13.140680313110352, + "logits/rejected": 13.140680313110352, + "logps/chosen": -3999.33203125, + "logps/rejected": -3999.33203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.10589599609375, + "rewards/margins": 0.0, + "rewards/rejected": -397.10589599609375, + "step": 1055 + }, + { + "epoch": 11.115789473684211, + "grad_norm": 2.3248396701092133e-06, + "learning_rate": 0.00017793684210526316, + "logits/chosen": 13.14515495300293, + "logits/rejected": 13.14515495300293, + "logps/chosen": -3758.888671875, + "logps/rejected": -3758.888671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9705505371094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9705505371094, + "step": 1056 + }, + { + "epoch": 11.126315789473685, + "grad_norm": 1.3584361795437871e-06, + "learning_rate": 0.00017791578947368421, + "logits/chosen": 13.119670867919922, + "logits/rejected": 13.119670867919922, + "logps/chosen": -2673.177734375, + "logps/rejected": -2673.177734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.53619384765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.53619384765625, + "step": 1057 + }, + { + "epoch": 11.136842105263158, + "grad_norm": 8.700322950971895e-07, + "learning_rate": 0.00017789473684210526, + "logits/chosen": 13.124737739562988, + "logits/rejected": 13.124737739562988, + "logps/chosen": -2967.685546875, + "logps/rejected": -2967.685546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0423583984375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0423583984375, + "step": 1058 + }, + { + "epoch": 11.147368421052631, + "grad_norm": 1.8731444697550614e-06, + "learning_rate": 0.0001778736842105263, + "logits/chosen": 13.113309860229492, + "logits/rejected": 13.113309860229492, + "logps/chosen": -4000.09375, + "logps/rejected": -4000.09375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.18206787109375, + "rewards/margins": 0.0, + "rewards/rejected": -397.18206787109375, + "step": 1059 + }, + { + "epoch": 11.157894736842104, + "grad_norm": 2.1676491996913683e-06, + "learning_rate": 0.0001778526315789474, + "logits/chosen": 13.106008529663086, + "logits/rejected": 13.106008529663086, + "logps/chosen": -3778.25, + "logps/rejected": -3778.25, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9559020996094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9559020996094, + "step": 1060 + }, + { + "epoch": 11.16842105263158, + "grad_norm": 1.620020839254721e-06, + "learning_rate": 0.00017783157894736844, + "logits/chosen": 13.110352516174316, + "logits/rejected": 13.110352516174316, + "logps/chosen": -3759.298828125, + "logps/rejected": -3759.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0115661621094, + "rewards/margins": 0.0, + "rewards/rejected": -373.0115661621094, + "step": 1061 + }, + { + "epoch": 11.178947368421053, + "grad_norm": 8.484549880449777e-07, + "learning_rate": 0.00017781052631578946, + "logits/chosen": 13.100408554077148, + "logits/rejected": 13.100408554077148, + "logps/chosen": -2967.64453125, + "logps/rejected": -2967.64453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.03826904296875, + "rewards/margins": 0.0, + "rewards/rejected": -294.03826904296875, + "step": 1062 + }, + { + "epoch": 11.189473684210526, + "grad_norm": 1.1153858849866083e-06, + "learning_rate": 0.00017778947368421054, + "logits/chosen": 13.091215133666992, + "logits/rejected": 13.091215133666992, + "logps/chosen": -4000.166015625, + "logps/rejected": -4000.166015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1893005371094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1893005371094, + "step": 1063 + }, + { + "epoch": 11.2, + "grad_norm": 2.8591518912435276e-06, + "learning_rate": 0.00017776842105263159, + "logits/chosen": 13.15332317352295, + "logits/rejected": 13.15332317352295, + "logps/chosen": -5172.298828125, + "logps/rejected": -5172.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2914428710938, + "rewards/margins": 0.0, + "rewards/rejected": -514.2914428710938, + "step": 1064 + }, + { + "epoch": 11.210526315789474, + "grad_norm": 1.5865151681282441e-06, + "learning_rate": 0.00017774736842105264, + "logits/chosen": 13.13739013671875, + "logits/rejected": 13.13739013671875, + "logps/chosen": -4881.796875, + "logps/rejected": -4881.796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.33251953125, + "rewards/margins": 0.0, + "rewards/rejected": -485.33251953125, + "step": 1065 + }, + { + "epoch": 11.221052631578948, + "grad_norm": 1.0422529612696962e-06, + "learning_rate": 0.00017772631578947368, + "logits/chosen": 13.078730583190918, + "logits/rejected": 13.078730583190918, + "logps/chosen": -4000.498046875, + "logps/rejected": -4000.498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.2225036621094, + "rewards/margins": 0.0, + "rewards/rejected": -397.2225036621094, + "step": 1066 + }, + { + "epoch": 11.23157894736842, + "grad_norm": 1.81129894372134e-06, + "learning_rate": 0.00017770526315789476, + "logits/chosen": 13.143586158752441, + "logits/rejected": 13.143586158752441, + "logps/chosen": -5172.357421875, + "logps/rejected": -5172.357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2973022460938, + "rewards/margins": 0.0, + "rewards/rejected": -514.2973022460938, + "step": 1067 + }, + { + "epoch": 11.242105263157894, + "grad_norm": 1.194509877677774e-06, + "learning_rate": 0.0001776842105263158, + "logits/chosen": 13.130171775817871, + "logits/rejected": 13.130171775817871, + "logps/chosen": -4882.02392578125, + "logps/rejected": -4882.02392578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.355224609375, + "rewards/margins": 0.0, + "rewards/rejected": -485.355224609375, + "step": 1068 + }, + { + "epoch": 11.25263157894737, + "grad_norm": 1.1006327440554742e-06, + "learning_rate": 0.00017766315789473683, + "logits/chosen": 13.133552551269531, + "logits/rejected": 13.133552551269531, + "logps/chosen": -4882.29736328125, + "logps/rejected": -4882.29736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.382568359375, + "rewards/margins": 0.0, + "rewards/rejected": -485.382568359375, + "step": 1069 + }, + { + "epoch": 11.263157894736842, + "grad_norm": 1.4036988886800827e-06, + "learning_rate": 0.0001776421052631579, + "logits/chosen": 13.090828895568848, + "logits/rejected": 13.090828895568848, + "logps/chosen": -4289.0556640625, + "logps/rejected": -4289.0556640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.10821533203125, + "rewards/margins": 0.0, + "rewards/rejected": -426.10821533203125, + "step": 1070 + }, + { + "epoch": 11.273684210526316, + "grad_norm": 1.9695432911248645e-06, + "learning_rate": 0.00017762105263157896, + "logits/chosen": 13.156347274780273, + "logits/rejected": 13.156347274780273, + "logps/chosen": -5173.0517578125, + "logps/rejected": -5173.0517578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3667602539062, + "rewards/margins": 0.0, + "rewards/rejected": -514.3667602539062, + "step": 1071 + }, + { + "epoch": 11.284210526315789, + "grad_norm": 1.204107661578746e-06, + "learning_rate": 0.0001776, + "logits/chosen": 13.08472728729248, + "logits/rejected": 13.08472728729248, + "logps/chosen": -2672.2265625, + "logps/rejected": -2672.2265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4410705566406, + "rewards/margins": 0.0, + "rewards/rejected": -264.4410705566406, + "step": 1072 + }, + { + "epoch": 11.294736842105262, + "grad_norm": 1.1482010222607641e-06, + "learning_rate": 0.00017757894736842106, + "logits/chosen": 13.156635284423828, + "logits/rejected": 13.156635284423828, + "logps/chosen": -4882.67236328125, + "logps/rejected": -4882.67236328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.4200744628906, + "rewards/margins": 0.0, + "rewards/rejected": -485.4200744628906, + "step": 1073 + }, + { + "epoch": 11.305263157894737, + "grad_norm": 1.6549596466575167e-06, + "learning_rate": 0.00017755789473684213, + "logits/chosen": 13.176777839660645, + "logits/rejected": 13.176777839660645, + "logps/chosen": -5173.92041015625, + "logps/rejected": -5173.92041015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.45361328125, + "rewards/margins": 0.0, + "rewards/rejected": -514.45361328125, + "step": 1074 + }, + { + "epoch": 11.31578947368421, + "grad_norm": 9.766260973265162e-07, + "learning_rate": 0.00017753684210526316, + "logits/chosen": 13.126758575439453, + "logits/rejected": 13.126758575439453, + "logps/chosen": -3759.0625, + "logps/rejected": -3759.0625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9879455566406, + "rewards/margins": 0.0, + "rewards/rejected": -372.9879455566406, + "step": 1075 + }, + { + "epoch": 11.326315789473684, + "grad_norm": 1.608597585800453e-06, + "learning_rate": 0.0001775157894736842, + "logits/chosen": 13.194517135620117, + "logits/rejected": 13.194517135620117, + "logps/chosen": -5175.11328125, + "logps/rejected": -5175.11328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5729370117188, + "rewards/margins": 0.0, + "rewards/rejected": -514.5729370117188, + "step": 1076 + }, + { + "epoch": 11.336842105263157, + "grad_norm": 9.253205917048035e-07, + "learning_rate": 0.00017749473684210528, + "logits/chosen": 13.138322830200195, + "logits/rejected": 13.138322830200195, + "logps/chosen": -2966.755859375, + "logps/rejected": -2966.755859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.94940185546875, + "rewards/margins": 0.0, + "rewards/rejected": -293.94940185546875, + "step": 1077 + }, + { + "epoch": 11.347368421052632, + "grad_norm": 1.484432914367062e-06, + "learning_rate": 0.00017747368421052633, + "logits/chosen": 13.185938835144043, + "logits/rejected": 13.185938835144043, + "logps/chosen": -4322.9375, + "logps/rejected": -4322.9375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.10888671875, + "rewards/margins": 0.0, + "rewards/rejected": -429.10888671875, + "step": 1078 + }, + { + "epoch": 11.357894736842105, + "grad_norm": 1.1060932365580811e-06, + "learning_rate": 0.00017745263157894738, + "logits/chosen": 13.144386291503906, + "logits/rejected": 13.144386291503906, + "logps/chosen": -2671.609375, + "logps/rejected": -2671.609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3793640136719, + "rewards/margins": 0.0, + "rewards/rejected": -264.3793640136719, + "step": 1079 + }, + { + "epoch": 11.368421052631579, + "grad_norm": 1.4104914498602739e-06, + "learning_rate": 0.00017743157894736843, + "logits/chosen": 13.233640670776367, + "logits/rejected": 13.233640670776367, + "logps/chosen": -5177.072265625, + "logps/rejected": -5177.072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.768798828125, + "rewards/margins": 0.0, + "rewards/rejected": -514.768798828125, + "step": 1080 + }, + { + "epoch": 11.378947368421052, + "grad_norm": 1.144772681982431e-06, + "learning_rate": 0.00017741052631578948, + "logits/chosen": 13.230626106262207, + "logits/rejected": 13.230626106262207, + "logps/chosen": -4882.15625, + "logps/rejected": -4882.15625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.36846923828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.36846923828125, + "step": 1081 + }, + { + "epoch": 11.389473684210527, + "grad_norm": 1.3507245739674545e-06, + "learning_rate": 0.00017738947368421053, + "logits/chosen": 13.240272521972656, + "logits/rejected": 13.240272521972656, + "logps/chosen": -4882.2900390625, + "logps/rejected": -4882.2900390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3818359375, + "rewards/margins": 0.0, + "rewards/rejected": -485.3818359375, + "step": 1082 + }, + { + "epoch": 11.4, + "grad_norm": 1.499010068073403e-06, + "learning_rate": 0.00017736842105263158, + "logits/chosen": 13.262801170349121, + "logits/rejected": 13.262801170349121, + "logps/chosen": -5178.076171875, + "logps/rejected": -5178.076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8692016601562, + "rewards/margins": 0.0, + "rewards/rejected": -514.8692016601562, + "step": 1083 + }, + { + "epoch": 11.410526315789474, + "grad_norm": 1.2322376505835564e-06, + "learning_rate": 0.00017734736842105265, + "logits/chosen": 13.204524040222168, + "logits/rejected": 13.204524040222168, + "logps/chosen": -2966.556640625, + "logps/rejected": -2966.556640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9294738769531, + "rewards/margins": 0.0, + "rewards/rejected": -293.9294738769531, + "step": 1084 + }, + { + "epoch": 11.421052631578947, + "grad_norm": 1.6729188700992381e-06, + "learning_rate": 0.0001773263157894737, + "logits/chosen": 13.210382461547852, + "logits/rejected": 13.210382461547852, + "logps/chosen": -3774.716796875, + "logps/rejected": -3774.716796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.6025695800781, + "rewards/margins": 0.0, + "rewards/rejected": -374.6025695800781, + "step": 1085 + }, + { + "epoch": 11.431578947368422, + "grad_norm": 1.41476141379826e-06, + "learning_rate": 0.00017730526315789475, + "logits/chosen": 13.279061317443848, + "logits/rejected": 13.279061317443848, + "logps/chosen": -4881.751953125, + "logps/rejected": -4881.751953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3280334472656, + "rewards/margins": 0.0, + "rewards/rejected": -485.3280334472656, + "step": 1086 + }, + { + "epoch": 11.442105263157895, + "grad_norm": 1.3853941709385253e-06, + "learning_rate": 0.0001772842105263158, + "logits/chosen": 13.228163719177246, + "logits/rejected": 13.228163719177246, + "logps/chosen": -3995.802734375, + "logps/rejected": -3995.802734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7529602050781, + "rewards/margins": 0.0, + "rewards/rejected": -396.7529602050781, + "step": 1087 + }, + { + "epoch": 11.452631578947368, + "grad_norm": 1.7602915249881335e-06, + "learning_rate": 0.00017726315789473685, + "logits/chosen": 13.295538902282715, + "logits/rejected": 13.295538902282715, + "logps/chosen": -4881.90380859375, + "logps/rejected": -4881.90380859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3432312011719, + "rewards/margins": 0.0, + "rewards/rejected": -485.3432312011719, + "step": 1088 + }, + { + "epoch": 11.463157894736842, + "grad_norm": 1.343849703516753e-06, + "learning_rate": 0.0001772421052631579, + "logits/chosen": 13.286849975585938, + "logits/rejected": 13.286849975585938, + "logps/chosen": -4324.69921875, + "logps/rejected": -4324.69921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.2850646972656, + "rewards/margins": 0.0, + "rewards/rejected": -429.2850646972656, + "step": 1089 + }, + { + "epoch": 11.473684210526315, + "grad_norm": 1.4026127246324904e-06, + "learning_rate": 0.00017722105263157895, + "logits/chosen": 13.257658004760742, + "logits/rejected": 13.257658004760742, + "logps/chosen": -3541.0703125, + "logps/rejected": -3541.0703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0811462402344, + "rewards/margins": 0.0, + "rewards/rejected": -351.0811462402344, + "step": 1090 + }, + { + "epoch": 11.48421052631579, + "grad_norm": 1.318843260378344e-06, + "learning_rate": 0.0001772, + "logits/chosen": 13.319183349609375, + "logits/rejected": 13.319183349609375, + "logps/chosen": -4882.43115234375, + "logps/rejected": -4882.43115234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3959655761719, + "rewards/margins": 0.0, + "rewards/rejected": -485.3959655761719, + "step": 1091 + }, + { + "epoch": 11.494736842105263, + "grad_norm": 1.5861633073654957e-06, + "learning_rate": 0.00017717894736842107, + "logits/chosen": 13.263603210449219, + "logits/rejected": 13.263603210449219, + "logps/chosen": -3995.583984375, + "logps/rejected": -3995.583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7310791015625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7310791015625, + "step": 1092 + }, + { + "epoch": 11.505263157894737, + "grad_norm": 1.3234729294708814e-06, + "learning_rate": 0.00017715789473684212, + "logits/chosen": 13.307832717895508, + "logits/rejected": 13.307832717895508, + "logps/chosen": -4325.40234375, + "logps/rejected": -4325.40234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3553771972656, + "rewards/margins": 0.0, + "rewards/rejected": -429.3553771972656, + "step": 1093 + }, + { + "epoch": 11.51578947368421, + "grad_norm": 9.968987342290347e-07, + "learning_rate": 0.00017713684210526315, + "logits/chosen": 13.269745826721191, + "logits/rejected": 13.269745826721191, + "logps/chosen": -3541.666015625, + "logps/rejected": -3541.666015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1407165527344, + "rewards/margins": 0.0, + "rewards/rejected": -351.1407165527344, + "step": 1094 + }, + { + "epoch": 11.526315789473685, + "grad_norm": 1.7469242266088258e-06, + "learning_rate": 0.00017711578947368422, + "logits/chosen": 13.322668075561523, + "logits/rejected": 13.322668075561523, + "logps/chosen": -4882.68701171875, + "logps/rejected": -4882.68701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.4215393066406, + "rewards/margins": 0.0, + "rewards/rejected": -485.4215393066406, + "step": 1095 + }, + { + "epoch": 11.536842105263158, + "grad_norm": 9.031431886796781e-07, + "learning_rate": 0.00017709473684210527, + "logits/chosen": 13.272272109985352, + "logits/rejected": 13.272272109985352, + "logps/chosen": -3542.076171875, + "logps/rejected": -3542.076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1817321777344, + "rewards/margins": 0.0, + "rewards/rejected": -351.1817321777344, + "step": 1096 + }, + { + "epoch": 11.547368421052632, + "grad_norm": 8.914427667150449e-07, + "learning_rate": 0.00017707368421052632, + "logits/chosen": 13.276670455932617, + "logits/rejected": 13.276670455932617, + "logps/chosen": -3542.412109375, + "logps/rejected": -3542.412109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.21533203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.21533203125, + "step": 1097 + }, + { + "epoch": 11.557894736842105, + "grad_norm": 8.729178375688207e-07, + "learning_rate": 0.00017705263157894737, + "logits/chosen": 13.282588958740234, + "logits/rejected": 13.282588958740234, + "logps/chosen": -3542.728515625, + "logps/rejected": -3542.728515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2469787597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2469787597656, + "step": 1098 + }, + { + "epoch": 11.568421052631578, + "grad_norm": 1.7410309283150127e-06, + "learning_rate": 0.00017703157894736845, + "logits/chosen": 13.342294692993164, + "logits/rejected": 13.342294692993164, + "logps/chosen": -4882.4658203125, + "logps/rejected": -4882.4658203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3994140625, + "rewards/margins": 0.0, + "rewards/rejected": -485.3994140625, + "step": 1099 + }, + { + "epoch": 11.578947368421053, + "grad_norm": 8.881889357326145e-07, + "learning_rate": 0.00017701052631578947, + "logits/chosen": 13.29837703704834, + "logits/rejected": 13.29837703704834, + "logps/chosen": -3543.39453125, + "logps/rejected": -3543.39453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3135681152344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3135681152344, + "step": 1100 + }, + { + "epoch": 11.578947368421053, + "eval_logits/chosen": 13.338384628295898, + "eval_logits/rejected": 13.338384628295898, + "eval_logps/chosen": -4311.4453125, + "eval_logps/rejected": -4311.4453125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.2413635253906, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.2413635253906, + "eval_runtime": 4.2993, + "eval_samples_per_second": 2.326, + "eval_steps_per_second": 2.326, + "step": 1100 + }, + { + "epoch": 11.589473684210526, + "grad_norm": 1.3109354313201038e-06, + "learning_rate": 0.00017698947368421052, + "logits/chosen": 13.37167739868164, + "logits/rejected": 13.37167739868164, + "logps/chosen": -5178.341796875, + "logps/rejected": -5178.341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.895751953125, + "rewards/margins": 0.0, + "rewards/rejected": -514.895751953125, + "step": 1101 + }, + { + "epoch": 11.6, + "grad_norm": 1.0846333680092357e-06, + "learning_rate": 0.0001769684210526316, + "logits/chosen": 13.314481735229492, + "logits/rejected": 13.314481735229492, + "logps/chosen": -3544.10546875, + "logps/rejected": -3544.10546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3846740722656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3846740722656, + "step": 1102 + }, + { + "epoch": 11.610526315789473, + "grad_norm": 1.0255636198053253e-06, + "learning_rate": 0.00017694736842105264, + "logits/chosen": 13.322269439697266, + "logits/rejected": 13.322269439697266, + "logps/chosen": -3544.44921875, + "logps/rejected": -3544.44921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4190368652344, + "rewards/margins": 0.0, + "rewards/rejected": -351.4190368652344, + "step": 1103 + }, + { + "epoch": 11.621052631578948, + "grad_norm": 1.0394411447123275e-06, + "learning_rate": 0.0001769263157894737, + "logits/chosen": 13.313828468322754, + "logits/rejected": 13.313828468322754, + "logps/chosen": -2671.3203125, + "logps/rejected": -2671.3203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3504333496094, + "rewards/margins": 0.0, + "rewards/rejected": -264.3504333496094, + "step": 1104 + }, + { + "epoch": 11.631578947368421, + "grad_norm": 1.5713408174633514e-06, + "learning_rate": 0.00017690526315789474, + "logits/chosen": 13.326851844787598, + "logits/rejected": 13.326851844787598, + "logps/chosen": -3995.94921875, + "logps/rejected": -3995.94921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7676086425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.7676086425781, + "step": 1105 + }, + { + "epoch": 11.642105263157895, + "grad_norm": 2.2194963094079867e-06, + "learning_rate": 0.00017688421052631582, + "logits/chosen": 13.398720741271973, + "logits/rejected": 13.398720741271973, + "logps/chosen": -5178.267578125, + "logps/rejected": -5178.267578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8883666992188, + "rewards/margins": 0.0, + "rewards/rejected": -514.8883666992188, + "step": 1106 + }, + { + "epoch": 11.652631578947368, + "grad_norm": 7.660092364858428e-07, + "learning_rate": 0.00017686315789473684, + "logits/chosen": 13.33483600616455, + "logits/rejected": 13.33483600616455, + "logps/chosen": -3545.88671875, + "logps/rejected": -3545.88671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.56280517578125, + "rewards/margins": 0.0, + "rewards/rejected": -351.56280517578125, + "step": 1107 + }, + { + "epoch": 11.663157894736843, + "grad_norm": 1.2355778835626552e-06, + "learning_rate": 0.0001768421052631579, + "logits/chosen": 13.369561195373535, + "logits/rejected": 13.369561195373535, + "logps/chosen": -4327.91796875, + "logps/rejected": -4327.91796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.60693359375, + "rewards/margins": 0.0, + "rewards/rejected": -429.60693359375, + "step": 1108 + }, + { + "epoch": 11.673684210526316, + "grad_norm": 1.5587136203976115e-06, + "learning_rate": 0.00017682105263157897, + "logits/chosen": 13.324060440063477, + "logits/rejected": 13.324060440063477, + "logps/chosen": -3995.958984375, + "logps/rejected": -3995.958984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7685852050781, + "rewards/margins": 0.0, + "rewards/rejected": -396.7685852050781, + "step": 1109 + }, + { + "epoch": 11.68421052631579, + "grad_norm": 1.2366976989142131e-06, + "learning_rate": 0.00017680000000000001, + "logits/chosen": 13.309664726257324, + "logits/rejected": 13.309664726257324, + "logps/chosen": -2671.439453125, + "logps/rejected": -2671.439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.36236572265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.36236572265625, + "step": 1110 + }, + { + "epoch": 11.694736842105263, + "grad_norm": 1.6387425603170414e-06, + "learning_rate": 0.00017677894736842106, + "logits/chosen": 13.319222450256348, + "logits/rejected": 13.319222450256348, + "logps/chosen": -4284.74609375, + "logps/rejected": -4284.74609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.67724609375, + "rewards/margins": 0.0, + "rewards/rejected": -425.67724609375, + "step": 1111 + }, + { + "epoch": 11.705263157894738, + "grad_norm": 1.3411945474217646e-06, + "learning_rate": 0.0001767578947368421, + "logits/chosen": 13.297146797180176, + "logits/rejected": 13.297146797180176, + "logps/chosen": -3996.51953125, + "logps/rejected": -3996.51953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.82464599609375, + "rewards/margins": 0.0, + "rewards/rejected": -396.82464599609375, + "step": 1112 + }, + { + "epoch": 11.715789473684211, + "grad_norm": 8.575942729294184e-07, + "learning_rate": 0.00017673684210526316, + "logits/chosen": 13.289884567260742, + "logits/rejected": 13.289884567260742, + "logps/chosen": -2967.37109375, + "logps/rejected": -2967.37109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.01092529296875, + "rewards/margins": 0.0, + "rewards/rejected": -294.01092529296875, + "step": 1113 + }, + { + "epoch": 11.726315789473684, + "grad_norm": 8.83433528997557e-07, + "learning_rate": 0.0001767157894736842, + "logits/chosen": 13.260509490966797, + "logits/rejected": 13.260509490966797, + "logps/chosen": -2672.458984375, + "logps/rejected": -2672.458984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4643249511719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4643249511719, + "step": 1114 + }, + { + "epoch": 11.736842105263158, + "grad_norm": 8.83172560861567e-07, + "learning_rate": 0.00017669473684210526, + "logits/chosen": 13.247953414916992, + "logits/rejected": 13.247953414916992, + "logps/chosen": -2672.71875, + "logps/rejected": -2672.71875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.49029541015625, + "rewards/margins": 0.0, + "rewards/rejected": -264.49029541015625, + "step": 1115 + }, + { + "epoch": 11.74736842105263, + "grad_norm": 1.8588118564366596e-06, + "learning_rate": 0.00017667368421052634, + "logits/chosen": 13.254203796386719, + "logits/rejected": 13.254203796386719, + "logps/chosen": -3756.74609375, + "logps/rejected": -3756.74609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.7563171386719, + "rewards/margins": 0.0, + "rewards/rejected": -372.7563171386719, + "step": 1116 + }, + { + "epoch": 11.757894736842106, + "grad_norm": 1.3917166370447376e-06, + "learning_rate": 0.0001766526315789474, + "logits/chosen": 13.24231243133545, + "logits/rejected": 13.24231243133545, + "logps/chosen": -3756.95703125, + "logps/rejected": -3756.95703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.77740478515625, + "rewards/margins": 0.0, + "rewards/rejected": -372.77740478515625, + "step": 1117 + }, + { + "epoch": 11.76842105263158, + "grad_norm": 1.5146015357458964e-06, + "learning_rate": 0.00017663157894736844, + "logits/chosen": 13.291352272033691, + "logits/rejected": 13.291352272033691, + "logps/chosen": -5176.9404296875, + "logps/rejected": -5176.9404296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.755615234375, + "rewards/margins": 0.0, + "rewards/rejected": -514.755615234375, + "step": 1118 + }, + { + "epoch": 11.778947368421052, + "grad_norm": 1.2394157238304615e-06, + "learning_rate": 0.00017661052631578949, + "logits/chosen": 13.221146583557129, + "logits/rejected": 13.221146583557129, + "logps/chosen": -3546.5751953125, + "logps/rejected": -3546.5751953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.63165283203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.63165283203125, + "step": 1119 + }, + { + "epoch": 11.789473684210526, + "grad_norm": 1.8103849015460582e-06, + "learning_rate": 0.00017658947368421053, + "logits/chosen": 13.210124969482422, + "logits/rejected": 13.210124969482422, + "logps/chosen": -3775.94140625, + "logps/rejected": -3775.94140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.72503662109375, + "rewards/margins": 0.0, + "rewards/rejected": -374.72503662109375, + "step": 1120 + }, + { + "epoch": 11.8, + "grad_norm": 1.1883638535437058e-06, + "learning_rate": 0.00017656842105263158, + "logits/chosen": 13.2149076461792, + "logits/rejected": 13.2149076461792, + "logps/chosen": -3546.697265625, + "logps/rejected": -3546.697265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.64385986328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.64385986328125, + "step": 1121 + }, + { + "epoch": 11.810526315789474, + "grad_norm": 1.2033098073516157e-06, + "learning_rate": 0.00017654736842105263, + "logits/chosen": 13.205141067504883, + "logits/rejected": 13.205141067504883, + "logps/chosen": -3997.81640625, + "logps/rejected": -3997.81640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9543151855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9543151855469, + "step": 1122 + }, + { + "epoch": 11.821052631578947, + "grad_norm": 1.589643829902343e-06, + "learning_rate": 0.00017652631578947368, + "logits/chosen": 13.206165313720703, + "logits/rejected": 13.206165313720703, + "logps/chosen": -3776.43359375, + "logps/rejected": -3776.43359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7742614746094, + "rewards/margins": 0.0, + "rewards/rejected": -374.7742614746094, + "step": 1123 + }, + { + "epoch": 11.83157894736842, + "grad_norm": 8.367911163986719e-07, + "learning_rate": 0.00017650526315789476, + "logits/chosen": 13.212361335754395, + "logits/rejected": 13.212361335754395, + "logps/chosen": -3547.021484375, + "logps/rejected": -3547.021484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.67626953125, + "rewards/margins": 0.0, + "rewards/rejected": -351.67626953125, + "step": 1124 + }, + { + "epoch": 11.842105263157894, + "grad_norm": 1.5414308336403337e-06, + "learning_rate": 0.0001764842105263158, + "logits/chosen": 13.205819129943848, + "logits/rejected": 13.205819129943848, + "logps/chosen": -3777.16015625, + "logps/rejected": -3777.16015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8468933105469, + "rewards/margins": 0.0, + "rewards/rejected": -374.8468933105469, + "step": 1125 + }, + { + "epoch": 11.852631578947369, + "grad_norm": 1.4040676887816517e-06, + "learning_rate": 0.00017646315789473683, + "logits/chosen": 13.207777976989746, + "logits/rejected": 13.207777976989746, + "logps/chosen": -3777.712890625, + "logps/rejected": -3777.712890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9021911621094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9021911621094, + "step": 1126 + }, + { + "epoch": 11.863157894736842, + "grad_norm": 1.2888542642031098e-06, + "learning_rate": 0.0001764421052631579, + "logits/chosen": 13.249421119689941, + "logits/rejected": 13.249421119689941, + "logps/chosen": -4329.033203125, + "logps/rejected": -4329.033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.7184753417969, + "rewards/margins": 0.0, + "rewards/rejected": -429.7184753417969, + "step": 1127 + }, + { + "epoch": 11.873684210526315, + "grad_norm": 1.69546319739311e-06, + "learning_rate": 0.00017642105263157896, + "logits/chosen": 13.215753555297852, + "logits/rejected": 13.215753555297852, + "logps/chosen": -4287.22314453125, + "logps/rejected": -4287.22314453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9249572753906, + "rewards/margins": 0.0, + "rewards/rejected": -425.9249572753906, + "step": 1128 + }, + { + "epoch": 11.884210526315789, + "grad_norm": 1.188335545521113e-06, + "learning_rate": 0.0001764, + "logits/chosen": 13.210471153259277, + "logits/rejected": 13.210471153259277, + "logps/chosen": -2968.7529296875, + "logps/rejected": -2968.7529296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.14910888671875, + "rewards/margins": 0.0, + "rewards/rejected": -294.14910888671875, + "step": 1129 + }, + { + "epoch": 11.894736842105264, + "grad_norm": 1.3131199239069247e-06, + "learning_rate": 0.00017637894736842105, + "logits/chosen": 13.204744338989258, + "logits/rejected": 13.204744338989258, + "logps/chosen": -3779.94140625, + "logps/rejected": -3779.94140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1250305175781, + "rewards/margins": 0.0, + "rewards/rejected": -375.1250305175781, + "step": 1130 + }, + { + "epoch": 11.905263157894737, + "grad_norm": 7.731790105935943e-07, + "learning_rate": 0.00017635789473684213, + "logits/chosen": 13.209800720214844, + "logits/rejected": 13.209800720214844, + "logps/chosen": -3546.8916015625, + "logps/rejected": -3546.8916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.6632995605469, + "rewards/margins": 0.0, + "rewards/rejected": -351.6632995605469, + "step": 1131 + }, + { + "epoch": 11.91578947368421, + "grad_norm": 1.980549313884694e-06, + "learning_rate": 0.00017633684210526315, + "logits/chosen": 13.270223617553711, + "logits/rejected": 13.270223617553711, + "logps/chosen": -5173.8330078125, + "logps/rejected": -5173.8330078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4448852539062, + "rewards/margins": 0.0, + "rewards/rejected": -514.4448852539062, + "step": 1132 + }, + { + "epoch": 11.926315789473684, + "grad_norm": 1.5226237337628845e-06, + "learning_rate": 0.0001763157894736842, + "logits/chosen": 13.201573371887207, + "logits/rejected": 13.201573371887207, + "logps/chosen": -3996.80859375, + "logps/rejected": -3996.80859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8535461425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8535461425781, + "step": 1133 + }, + { + "epoch": 11.936842105263159, + "grad_norm": 1.4161789749778109e-06, + "learning_rate": 0.00017629473684210528, + "logits/chosen": 13.21018123626709, + "logits/rejected": 13.21018123626709, + "logps/chosen": -4288.837890625, + "logps/rejected": -4288.837890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.08642578125, + "rewards/margins": 0.0, + "rewards/rejected": -426.08642578125, + "step": 1134 + }, + { + "epoch": 11.947368421052632, + "grad_norm": 2.034541466855444e-06, + "learning_rate": 0.00017627368421052633, + "logits/chosen": 13.268797874450684, + "logits/rejected": 13.268797874450684, + "logps/chosen": -5173.50341796875, + "logps/rejected": -5173.50341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4119262695312, + "rewards/margins": 0.0, + "rewards/rejected": -514.4119262695312, + "step": 1135 + }, + { + "epoch": 11.957894736842105, + "grad_norm": 1.365693606203422e-06, + "learning_rate": 0.00017625263157894738, + "logits/chosen": 13.193127632141113, + "logits/rejected": 13.193127632141113, + "logps/chosen": -3996.896484375, + "logps/rejected": -3996.896484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8623352050781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8623352050781, + "step": 1136 + }, + { + "epoch": 11.968421052631578, + "grad_norm": 1.0118862974195508e-06, + "learning_rate": 0.00017623157894736843, + "logits/chosen": 13.196903228759766, + "logits/rejected": 13.196903228759766, + "logps/chosen": -3546.19921875, + "logps/rejected": -3546.19921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.59405517578125, + "rewards/margins": 0.0, + "rewards/rejected": -351.59405517578125, + "step": 1137 + }, + { + "epoch": 11.978947368421053, + "grad_norm": 9.494154937783605e-07, + "learning_rate": 0.0001762105263157895, + "logits/chosen": 13.174653053283691, + "logits/rejected": 13.174653053283691, + "logps/chosen": -2675.28173828125, + "logps/rejected": -2675.28173828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.74658203125, + "rewards/margins": 0.0, + "rewards/rejected": -264.74658203125, + "step": 1138 + }, + { + "epoch": 11.989473684210527, + "grad_norm": 8.6102767227203e-07, + "learning_rate": 0.00017618947368421052, + "logits/chosen": 13.169896125793457, + "logits/rejected": 13.169896125793457, + "logps/chosen": -2675.30322265625, + "logps/rejected": -2675.30322265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7487487792969, + "rewards/margins": 0.0, + "rewards/rejected": -264.7487487792969, + "step": 1139 + }, + { + "epoch": 12.0, + "grad_norm": 1.532814735583088e-06, + "learning_rate": 0.00017616842105263157, + "logits/chosen": 13.172043800354004, + "logits/rejected": 13.172043800354004, + "logps/chosen": -3997.353515625, + "logps/rejected": -3997.353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9080505371094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9080505371094, + "step": 1140 + }, + { + "epoch": 12.010526315789473, + "grad_norm": 2.001610027946299e-06, + "learning_rate": 0.00017614736842105265, + "logits/chosen": 13.223211288452148, + "logits/rejected": 13.223211288452148, + "logps/chosen": -4875.69970703125, + "logps/rejected": -4875.69970703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.7228088378906, + "rewards/margins": 0.0, + "rewards/rejected": -484.7228088378906, + "step": 1141 + }, + { + "epoch": 12.021052631578947, + "grad_norm": 1.4257593647926114e-06, + "learning_rate": 0.0001761263157894737, + "logits/chosen": 13.205347061157227, + "logits/rejected": 13.205347061157227, + "logps/chosen": -4328.4833984375, + "logps/rejected": -4328.4833984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.6634826660156, + "rewards/margins": 0.0, + "rewards/rejected": -429.6634826660156, + "step": 1142 + }, + { + "epoch": 12.031578947368422, + "grad_norm": 1.1494424825286842e-06, + "learning_rate": 0.00017610526315789475, + "logits/chosen": 13.158509254455566, + "logits/rejected": 13.158509254455566, + "logps/chosen": -3997.775390625, + "logps/rejected": -3997.775390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9502258300781, + "rewards/margins": 0.0, + "rewards/rejected": -396.9502258300781, + "step": 1143 + }, + { + "epoch": 12.042105263157895, + "grad_norm": 2.3392249204334803e-06, + "learning_rate": 0.0001760842105263158, + "logits/chosen": 13.226460456848145, + "logits/rejected": 13.226460456848145, + "logps/chosen": -5173.70361328125, + "logps/rejected": -5173.70361328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4319458007812, + "rewards/margins": 0.0, + "rewards/rejected": -514.4319458007812, + "step": 1144 + }, + { + "epoch": 12.052631578947368, + "grad_norm": 9.74021077126963e-07, + "learning_rate": 0.00017606315789473685, + "logits/chosen": 13.162618637084961, + "logits/rejected": 13.162618637084961, + "logps/chosen": -3545.4638671875, + "logps/rejected": -3545.4638671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5205078125, + "rewards/margins": 0.0, + "rewards/rejected": -351.5205078125, + "step": 1145 + }, + { + "epoch": 12.063157894736841, + "grad_norm": 8.510671705153072e-07, + "learning_rate": 0.0001760421052631579, + "logits/chosen": 13.16342830657959, + "logits/rejected": 13.16342830657959, + "logps/chosen": -3545.2705078125, + "logps/rejected": -3545.2705078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5011901855469, + "rewards/margins": 0.0, + "rewards/rejected": -351.5011901855469, + "step": 1146 + }, + { + "epoch": 12.073684210526316, + "grad_norm": 8.024080671020783e-07, + "learning_rate": 0.00017602105263157895, + "logits/chosen": 13.149815559387207, + "logits/rejected": 13.149815559387207, + "logps/chosen": -2675.3203125, + "logps/rejected": -2675.3203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7504577636719, + "rewards/margins": 0.0, + "rewards/rejected": -264.7504577636719, + "step": 1147 + }, + { + "epoch": 12.08421052631579, + "grad_norm": 1.6834076177474344e-06, + "learning_rate": 0.00017600000000000002, + "logits/chosen": 13.15926456451416, + "logits/rejected": 13.15926456451416, + "logps/chosen": -3998.701171875, + "logps/rejected": -3998.701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0428161621094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0428161621094, + "step": 1148 + }, + { + "epoch": 12.094736842105263, + "grad_norm": 1.5382627225335455e-06, + "learning_rate": 0.00017597894736842107, + "logits/chosen": 13.157814979553223, + "logits/rejected": 13.157814979553223, + "logps/chosen": -3998.921875, + "logps/rejected": -3998.921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.06488037109375, + "rewards/margins": 0.0, + "rewards/rejected": -397.06488037109375, + "step": 1149 + }, + { + "epoch": 12.105263157894736, + "grad_norm": 1.5462560440937523e-06, + "learning_rate": 0.00017595789473684212, + "logits/chosen": 13.224363327026367, + "logits/rejected": 13.224363327026367, + "logps/chosen": -5174.9169921875, + "logps/rejected": -5174.9169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5532836914062, + "rewards/margins": 0.0, + "rewards/rejected": -514.5532836914062, + "step": 1150 + }, + { + "epoch": 12.105263157894736, + "eval_logits/chosen": 13.188931465148926, + "eval_logits/rejected": 13.188931465148926, + "eval_logps/chosen": -4310.96630859375, + "eval_logps/rejected": -4310.96630859375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.1934509277344, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.1934509277344, + "eval_runtime": 4.4204, + "eval_samples_per_second": 2.262, + "eval_steps_per_second": 2.262, + "step": 1150 + }, + { + "epoch": 12.115789473684211, + "grad_norm": 1.0568624020379502e-06, + "learning_rate": 0.00017593684210526314, + "logits/chosen": 13.157355308532715, + "logits/rejected": 13.157355308532715, + "logps/chosen": -2968.5068359375, + "logps/rejected": -2968.5068359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1244812011719, + "rewards/margins": 0.0, + "rewards/rejected": -294.1244812011719, + "step": 1151 + }, + { + "epoch": 12.126315789473685, + "grad_norm": 1.1804228279288509e-06, + "learning_rate": 0.00017591578947368422, + "logits/chosen": 13.154706001281738, + "logits/rejected": 13.154706001281738, + "logps/chosen": -2968.431640625, + "logps/rejected": -2968.431640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1169738769531, + "rewards/margins": 0.0, + "rewards/rejected": -294.1169738769531, + "step": 1152 + }, + { + "epoch": 12.136842105263158, + "grad_norm": 1.3160945400159108e-06, + "learning_rate": 0.00017589473684210527, + "logits/chosen": 13.190837860107422, + "logits/rejected": 13.190837860107422, + "logps/chosen": -4328.025390625, + "logps/rejected": -4328.025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.61767578125, + "rewards/margins": 0.0, + "rewards/rejected": -429.61767578125, + "step": 1153 + }, + { + "epoch": 12.147368421052631, + "grad_norm": 9.006783443510358e-07, + "learning_rate": 0.00017587368421052632, + "logits/chosen": 13.138744354248047, + "logits/rejected": 13.138744354248047, + "logps/chosen": -2675.0927734375, + "logps/rejected": -2675.0927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7276916503906, + "rewards/margins": 0.0, + "rewards/rejected": -264.7276916503906, + "step": 1154 + }, + { + "epoch": 12.157894736842104, + "grad_norm": 1.6588943481110618e-06, + "learning_rate": 0.00017585263157894737, + "logits/chosen": 13.204049110412598, + "logits/rejected": 13.204049110412598, + "logps/chosen": -4877.3251953125, + "logps/rejected": -4877.3251953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8853454589844, + "rewards/margins": 0.0, + "rewards/rejected": -484.8853454589844, + "step": 1155 + }, + { + "epoch": 12.16842105263158, + "grad_norm": 2.179196371798753e-06, + "learning_rate": 0.00017583157894736844, + "logits/chosen": 13.219549179077148, + "logits/rejected": 13.219549179077148, + "logps/chosen": -5175.49853515625, + "logps/rejected": -5175.49853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6114501953125, + "rewards/margins": 0.0, + "rewards/rejected": -514.6114501953125, + "step": 1156 + }, + { + "epoch": 12.178947368421053, + "grad_norm": 8.472194394926191e-07, + "learning_rate": 0.0001758105263157895, + "logits/chosen": 13.167341232299805, + "logits/rejected": 13.167341232299805, + "logps/chosen": -3545.0927734375, + "logps/rejected": -3545.0927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4833984375, + "rewards/margins": 0.0, + "rewards/rejected": -351.4833984375, + "step": 1157 + }, + { + "epoch": 12.189473684210526, + "grad_norm": 8.551896257813496e-07, + "learning_rate": 0.00017578947368421052, + "logits/chosen": 13.175485610961914, + "logits/rejected": 13.175485610961914, + "logps/chosen": -2968.9130859375, + "logps/rejected": -2968.9130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1651306152344, + "rewards/margins": 0.0, + "rewards/rejected": -294.1651306152344, + "step": 1158 + }, + { + "epoch": 12.2, + "grad_norm": 1.011404378914449e-06, + "learning_rate": 0.0001757684210526316, + "logits/chosen": 13.16876220703125, + "logits/rejected": 13.16876220703125, + "logps/chosen": -2675.162109375, + "logps/rejected": -2675.162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.734619140625, + "rewards/margins": 0.0, + "rewards/rejected": -264.734619140625, + "step": 1159 + }, + { + "epoch": 12.210526315789474, + "grad_norm": 9.106698257710377e-07, + "learning_rate": 0.00017574736842105264, + "logits/chosen": 13.194976806640625, + "logits/rejected": 13.194976806640625, + "logps/chosen": -3545.2578125, + "logps/rejected": -3545.2578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4999084472656, + "rewards/margins": 0.0, + "rewards/rejected": -351.4999084472656, + "step": 1160 + }, + { + "epoch": 12.221052631578948, + "grad_norm": 1.2417212928994559e-06, + "learning_rate": 0.0001757263157894737, + "logits/chosen": 13.19233226776123, + "logits/rejected": 13.19233226776123, + "logps/chosen": -3998.916015625, + "logps/rejected": -3998.916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0643005371094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0643005371094, + "step": 1161 + }, + { + "epoch": 12.23157894736842, + "grad_norm": 1.2281788031032193e-06, + "learning_rate": 0.00017570526315789474, + "logits/chosen": 13.196739196777344, + "logits/rejected": 13.196739196777344, + "logps/chosen": -3998.84765625, + "logps/rejected": -3998.84765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0574645996094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0574645996094, + "step": 1162 + }, + { + "epoch": 12.242105263157894, + "grad_norm": 7.93373601482017e-07, + "learning_rate": 0.00017568421052631582, + "logits/chosen": 13.208879470825195, + "logits/rejected": 13.208879470825195, + "logps/chosen": -3545.5859375, + "logps/rejected": -3545.5859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.53271484375, + "rewards/margins": 0.0, + "rewards/rejected": -351.53271484375, + "step": 1163 + }, + { + "epoch": 12.25263157894737, + "grad_norm": 1.1000822723872261e-06, + "learning_rate": 0.00017566315789473684, + "logits/chosen": 13.210315704345703, + "logits/rejected": 13.210315704345703, + "logps/chosen": -3757.365234375, + "logps/rejected": -3757.365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8182067871094, + "rewards/margins": 0.0, + "rewards/rejected": -372.8182067871094, + "step": 1164 + }, + { + "epoch": 12.263157894736842, + "grad_norm": 1.4750858099432662e-06, + "learning_rate": 0.0001756421052631579, + "logits/chosen": 13.244125366210938, + "logits/rejected": 13.244125366210938, + "logps/chosen": -4328.5107421875, + "logps/rejected": -4328.5107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.6662292480469, + "rewards/margins": 0.0, + "rewards/rejected": -429.6662292480469, + "step": 1165 + }, + { + "epoch": 12.273684210526316, + "grad_norm": 7.83409063842555e-07, + "learning_rate": 0.00017562105263157896, + "logits/chosen": 13.211919784545898, + "logits/rejected": 13.211919784545898, + "logps/chosen": -3545.8154296875, + "logps/rejected": -3545.8154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5556640625, + "rewards/margins": 0.0, + "rewards/rejected": -351.5556640625, + "step": 1166 + }, + { + "epoch": 12.284210526315789, + "grad_norm": 8.064207008828816e-07, + "learning_rate": 0.0001756, + "logits/chosen": 13.212503433227539, + "logits/rejected": 13.212503433227539, + "logps/chosen": -2969.1298828125, + "logps/rejected": -2969.1298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1867980957031, + "rewards/margins": 0.0, + "rewards/rejected": -294.1867980957031, + "step": 1167 + }, + { + "epoch": 12.294736842105262, + "grad_norm": 7.703820301685482e-07, + "learning_rate": 0.00017557894736842106, + "logits/chosen": 13.216180801391602, + "logits/rejected": 13.216180801391602, + "logps/chosen": -3546.19140625, + "logps/rejected": -3546.19140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.59326171875, + "rewards/margins": 0.0, + "rewards/rejected": -351.59326171875, + "step": 1168 + }, + { + "epoch": 12.305263157894737, + "grad_norm": 1.4685492715216242e-06, + "learning_rate": 0.0001755578947368421, + "logits/chosen": 13.211734771728516, + "logits/rejected": 13.211734771728516, + "logps/chosen": -3776.330078125, + "logps/rejected": -3776.330078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7638854980469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7638854980469, + "step": 1169 + }, + { + "epoch": 12.31578947368421, + "grad_norm": 1.6122794477269053e-06, + "learning_rate": 0.0001755368421052632, + "logits/chosen": 13.268331527709961, + "logits/rejected": 13.268331527709961, + "logps/chosen": -4876.88037109375, + "logps/rejected": -4876.88037109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.84088134765625, + "rewards/margins": 0.0, + "rewards/rejected": -484.84088134765625, + "step": 1170 + }, + { + "epoch": 12.326315789473684, + "grad_norm": 1.5342782262450783e-06, + "learning_rate": 0.0001755157894736842, + "logits/chosen": 13.269502639770508, + "logits/rejected": 13.269502639770508, + "logps/chosen": -4877.0869140625, + "logps/rejected": -4877.0869140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8615417480469, + "rewards/margins": 0.0, + "rewards/rejected": -484.8615417480469, + "step": 1171 + }, + { + "epoch": 12.336842105263157, + "grad_norm": 9.192314678330149e-07, + "learning_rate": 0.00017549473684210526, + "logits/chosen": 13.204910278320312, + "logits/rejected": 13.204910278320312, + "logps/chosen": -2674.8876953125, + "logps/rejected": -2674.8876953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7071838378906, + "rewards/margins": 0.0, + "rewards/rejected": -264.7071838378906, + "step": 1172 + }, + { + "epoch": 12.347368421052632, + "grad_norm": 1.0172065003644093e-06, + "learning_rate": 0.00017547368421052634, + "logits/chosen": 13.221443176269531, + "logits/rejected": 13.221443176269531, + "logps/chosen": -3758.3583984375, + "logps/rejected": -3758.3583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.91754150390625, + "rewards/margins": 0.0, + "rewards/rejected": -372.91754150390625, + "step": 1173 + }, + { + "epoch": 12.357894736842105, + "grad_norm": 7.945783977447718e-07, + "learning_rate": 0.00017545263157894738, + "logits/chosen": 13.219433784484863, + "logits/rejected": 13.219433784484863, + "logps/chosen": -3546.3076171875, + "logps/rejected": -3546.3076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.6048889160156, + "rewards/margins": 0.0, + "rewards/rejected": -351.6048889160156, + "step": 1174 + }, + { + "epoch": 12.368421052631579, + "grad_norm": 1.0575088253972353e-06, + "learning_rate": 0.00017543157894736843, + "logits/chosen": 13.218424797058105, + "logits/rejected": 13.218424797058105, + "logps/chosen": -3758.75390625, + "logps/rejected": -3758.75390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.95709228515625, + "rewards/margins": 0.0, + "rewards/rejected": -372.95709228515625, + "step": 1175 + }, + { + "epoch": 12.378947368421052, + "grad_norm": 8.342209980582993e-07, + "learning_rate": 0.00017541052631578948, + "logits/chosen": 13.199202537536621, + "logits/rejected": 13.199202537536621, + "logps/chosen": -2674.888671875, + "logps/rejected": -2674.888671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.707275390625, + "rewards/margins": 0.0, + "rewards/rejected": -264.707275390625, + "step": 1176 + }, + { + "epoch": 12.389473684210527, + "grad_norm": 1.578581191097328e-06, + "learning_rate": 0.00017538947368421053, + "logits/chosen": 13.214439392089844, + "logits/rejected": 13.214439392089844, + "logps/chosen": -4287.619140625, + "logps/rejected": -4287.619140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9645690917969, + "rewards/margins": 0.0, + "rewards/rejected": -425.9645690917969, + "step": 1177 + }, + { + "epoch": 12.4, + "grad_norm": 1.4678345223728684e-06, + "learning_rate": 0.00017536842105263158, + "logits/chosen": 13.2723388671875, + "logits/rejected": 13.2723388671875, + "logps/chosen": -5175.3310546875, + "logps/rejected": -5175.3310546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5946655273438, + "rewards/margins": 0.0, + "rewards/rejected": -514.5946655273438, + "step": 1178 + }, + { + "epoch": 12.410526315789474, + "grad_norm": 1.457522216696816e-06, + "learning_rate": 0.00017534736842105263, + "logits/chosen": 13.203184127807617, + "logits/rejected": 13.203184127807617, + "logps/chosen": -3777.3095703125, + "logps/rejected": -3777.3095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8618469238281, + "rewards/margins": 0.0, + "rewards/rejected": -374.8618469238281, + "step": 1179 + }, + { + "epoch": 12.421052631578947, + "grad_norm": 1.8166482504966552e-06, + "learning_rate": 0.0001753263157894737, + "logits/chosen": 13.256892204284668, + "logits/rejected": 13.256892204284668, + "logps/chosen": -4878.2294921875, + "logps/rejected": -4878.2294921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9757995605469, + "rewards/margins": 0.0, + "rewards/rejected": -484.9757995605469, + "step": 1180 + }, + { + "epoch": 12.431578947368422, + "grad_norm": 1.1513600384205347e-06, + "learning_rate": 0.00017530526315789476, + "logits/chosen": 13.206685066223145, + "logits/rejected": 13.206685066223145, + "logps/chosen": -3759.626953125, + "logps/rejected": -3759.626953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0444030761719, + "rewards/margins": 0.0, + "rewards/rejected": -373.0444030761719, + "step": 1181 + }, + { + "epoch": 12.442105263157895, + "grad_norm": 1.6320326494678739e-06, + "learning_rate": 0.0001752842105263158, + "logits/chosen": 13.262678146362305, + "logits/rejected": 13.262678146362305, + "logps/chosen": -5175.26953125, + "logps/rejected": -5175.26953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5885620117188, + "rewards/margins": 0.0, + "rewards/rejected": -514.5885620117188, + "step": 1182 + }, + { + "epoch": 12.452631578947368, + "grad_norm": 1.380205389978073e-06, + "learning_rate": 0.00017526315789473683, + "logits/chosen": 13.19819164276123, + "logits/rejected": 13.19819164276123, + "logps/chosen": -4288.6201171875, + "logps/rejected": -4288.6201171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0646667480469, + "rewards/margins": 0.0, + "rewards/rejected": -426.0646667480469, + "step": 1183 + }, + { + "epoch": 12.463157894736842, + "grad_norm": 1.4973530824136105e-06, + "learning_rate": 0.0001752421052631579, + "logits/chosen": 13.255668640136719, + "logits/rejected": 13.255668640136719, + "logps/chosen": -5175.7021484375, + "logps/rejected": -5175.7021484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6317749023438, + "rewards/margins": 0.0, + "rewards/rejected": -514.6317749023438, + "step": 1184 + }, + { + "epoch": 12.473684210526315, + "grad_norm": 8.26027758193959e-07, + "learning_rate": 0.00017522105263157895, + "logits/chosen": 13.19247817993164, + "logits/rejected": 13.19247817993164, + "logps/chosen": -3545.4443359375, + "logps/rejected": -3545.4443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5185546875, + "rewards/margins": 0.0, + "rewards/rejected": -351.5185546875, + "step": 1185 + }, + { + "epoch": 12.48421052631579, + "grad_norm": 8.63258264871547e-07, + "learning_rate": 0.0001752, + "logits/chosen": 13.19344711303711, + "logits/rejected": 13.19344711303711, + "logps/chosen": -3545.576171875, + "logps/rejected": -3545.576171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.53173828125, + "rewards/margins": 0.0, + "rewards/rejected": -351.53173828125, + "step": 1186 + }, + { + "epoch": 12.494736842105263, + "grad_norm": 8.619325626568752e-07, + "learning_rate": 0.00017517894736842105, + "logits/chosen": 13.19863510131836, + "logits/rejected": 13.19863510131836, + "logps/chosen": -3545.6513671875, + "logps/rejected": -3545.6513671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5392761230469, + "rewards/margins": 0.0, + "rewards/rejected": -351.5392761230469, + "step": 1187 + }, + { + "epoch": 12.505263157894737, + "grad_norm": 1.373716258967761e-06, + "learning_rate": 0.00017515789473684213, + "logits/chosen": 13.255412101745605, + "logits/rejected": 13.255412101745605, + "logps/chosen": -4879.18701171875, + "logps/rejected": -4879.18701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.071533203125, + "rewards/margins": 0.0, + "rewards/rejected": -485.071533203125, + "step": 1188 + }, + { + "epoch": 12.51578947368421, + "grad_norm": 1.444881149836874e-06, + "learning_rate": 0.00017513684210526318, + "logits/chosen": 13.263691902160645, + "logits/rejected": 13.263691902160645, + "logps/chosen": -4879.3876953125, + "logps/rejected": -4879.3876953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.09161376953125, + "rewards/margins": 0.0, + "rewards/rejected": -485.09161376953125, + "step": 1189 + }, + { + "epoch": 12.526315789473685, + "grad_norm": 1.5663051726733102e-06, + "learning_rate": 0.0001751157894736842, + "logits/chosen": 13.208550453186035, + "logits/rejected": 13.208550453186035, + "logps/chosen": -3995.5078125, + "logps/rejected": -3995.5078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7234802246094, + "rewards/margins": 0.0, + "rewards/rejected": -396.7234802246094, + "step": 1190 + }, + { + "epoch": 12.536842105263158, + "grad_norm": 1.49425943618553e-06, + "learning_rate": 0.00017509473684210528, + "logits/chosen": 13.209768295288086, + "logits/rejected": 13.209768295288086, + "logps/chosen": -3995.65234375, + "logps/rejected": -3995.65234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7379150390625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7379150390625, + "step": 1191 + }, + { + "epoch": 12.547368421052632, + "grad_norm": 1.3053418115305249e-06, + "learning_rate": 0.00017507368421052633, + "logits/chosen": 13.21781063079834, + "logits/rejected": 13.21781063079834, + "logps/chosen": -4289.62890625, + "logps/rejected": -4289.62890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.16552734375, + "rewards/margins": 0.0, + "rewards/rejected": -426.16552734375, + "step": 1192 + }, + { + "epoch": 12.557894736842105, + "grad_norm": 1.270706889044959e-06, + "learning_rate": 0.00017505263157894738, + "logits/chosen": 13.199355125427246, + "logits/rejected": 13.199355125427246, + "logps/chosen": -3995.9765625, + "logps/rejected": -3995.9765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7703552246094, + "rewards/margins": 0.0, + "rewards/rejected": -396.7703552246094, + "step": 1193 + }, + { + "epoch": 12.568421052631578, + "grad_norm": 1.5296182027668692e-06, + "learning_rate": 0.00017503157894736842, + "logits/chosen": 13.19531536102295, + "logits/rejected": 13.19531536102295, + "logps/chosen": -3777.271484375, + "logps/rejected": -3777.271484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8580322265625, + "rewards/margins": 0.0, + "rewards/rejected": -374.8580322265625, + "step": 1194 + }, + { + "epoch": 12.578947368421053, + "grad_norm": 8.08208028502122e-07, + "learning_rate": 0.0001750105263157895, + "logits/chosen": 13.193829536437988, + "logits/rejected": 13.193829536437988, + "logps/chosen": -2968.283203125, + "logps/rejected": -2968.283203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1021423339844, + "rewards/margins": 0.0, + "rewards/rejected": -294.1021423339844, + "step": 1195 + }, + { + "epoch": 12.589473684210526, + "grad_norm": 1.5503273971262388e-06, + "learning_rate": 0.00017498947368421052, + "logits/chosen": 13.250874519348145, + "logits/rejected": 13.250874519348145, + "logps/chosen": -5176.5908203125, + "logps/rejected": -5176.5908203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7206420898438, + "rewards/margins": 0.0, + "rewards/rejected": -514.7206420898438, + "step": 1196 + }, + { + "epoch": 12.6, + "grad_norm": 9.686530120234238e-07, + "learning_rate": 0.00017496842105263157, + "logits/chosen": 13.183822631835938, + "logits/rejected": 13.183822631835938, + "logps/chosen": -3544.3095703125, + "logps/rejected": -3544.3095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.40509033203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.40509033203125, + "step": 1197 + }, + { + "epoch": 12.610526315789473, + "grad_norm": 1.7548613868711982e-06, + "learning_rate": 0.00017494736842105265, + "logits/chosen": 13.23482894897461, + "logits/rejected": 13.23482894897461, + "logps/chosen": -4880.33984375, + "logps/rejected": -4880.33984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.18682861328125, + "rewards/margins": 0.0, + "rewards/rejected": -485.18682861328125, + "step": 1198 + }, + { + "epoch": 12.621052631578948, + "grad_norm": 1.1824447483377298e-06, + "learning_rate": 0.0001749263157894737, + "logits/chosen": 13.164804458618164, + "logits/rejected": 13.164804458618164, + "logps/chosen": -2672.935546875, + "logps/rejected": -2672.935546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.511962890625, + "rewards/margins": 0.0, + "rewards/rejected": -264.511962890625, + "step": 1199 + }, + { + "epoch": 12.631578947368421, + "grad_norm": 1.5380776403617347e-06, + "learning_rate": 0.00017490526315789475, + "logits/chosen": 13.178834915161133, + "logits/rejected": 13.178834915161133, + "logps/chosen": -3777.666015625, + "logps/rejected": -3777.666015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8974914550781, + "rewards/margins": 0.0, + "rewards/rejected": -374.8974914550781, + "step": 1200 + }, + { + "epoch": 12.631578947368421, + "eval_logits/chosen": 13.219343185424805, + "eval_logits/rejected": 13.219343185424805, + "eval_logps/chosen": -4310.83984375, + "eval_logps/rejected": -4310.83984375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.18084716796875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.18084716796875, + "eval_runtime": 4.6477, + "eval_samples_per_second": 2.152, + "eval_steps_per_second": 2.152, + "step": 1200 + }, + { + "epoch": 12.642105263157895, + "grad_norm": 1.29360205392004e-06, + "learning_rate": 0.0001748842105263158, + "logits/chosen": 13.240039825439453, + "logits/rejected": 13.240039825439453, + "logps/chosen": -4880.54296875, + "logps/rejected": -4880.54296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2071228027344, + "rewards/margins": 0.0, + "rewards/rejected": -485.2071228027344, + "step": 1201 + }, + { + "epoch": 12.652631578947368, + "grad_norm": 9.248939250028343e-07, + "learning_rate": 0.00017486315789473685, + "logits/chosen": 13.171520233154297, + "logits/rejected": 13.171520233154297, + "logps/chosen": -2672.8818359375, + "logps/rejected": -2672.8818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.506591796875, + "rewards/margins": 0.0, + "rewards/rejected": -264.506591796875, + "step": 1202 + }, + { + "epoch": 12.663157894736843, + "grad_norm": 1.7157086631414131e-06, + "learning_rate": 0.0001748421052631579, + "logits/chosen": 13.257827758789062, + "logits/rejected": 13.257827758789062, + "logps/chosen": -5176.35205078125, + "logps/rejected": -5176.35205078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.69677734375, + "rewards/margins": 0.0, + "rewards/rejected": -514.69677734375, + "step": 1203 + }, + { + "epoch": 12.673684210526316, + "grad_norm": 1.2350144515949069e-06, + "learning_rate": 0.00017482105263157894, + "logits/chosen": 13.182551383972168, + "logits/rejected": 13.182551383972168, + "logps/chosen": -3997.763671875, + "logps/rejected": -3997.763671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9490661621094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9490661621094, + "step": 1204 + }, + { + "epoch": 12.68421052631579, + "grad_norm": 1.0536100489844102e-06, + "learning_rate": 0.00017480000000000002, + "logits/chosen": 13.194211959838867, + "logits/rejected": 13.194211959838867, + "logps/chosen": -3758.677734375, + "logps/rejected": -3758.677734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.949462890625, + "rewards/margins": 0.0, + "rewards/rejected": -372.949462890625, + "step": 1205 + }, + { + "epoch": 12.694736842105263, + "grad_norm": 1.496665959166421e-06, + "learning_rate": 0.00017477894736842107, + "logits/chosen": 13.176555633544922, + "logits/rejected": 13.176555633544922, + "logps/chosen": -3997.82421875, + "logps/rejected": -3997.82421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9551086425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.9551086425781, + "step": 1206 + }, + { + "epoch": 12.705263157894738, + "grad_norm": 1.4589624015570735e-06, + "learning_rate": 0.00017475789473684212, + "logits/chosen": 13.23509693145752, + "logits/rejected": 13.23509693145752, + "logps/chosen": -4881.1796875, + "logps/rejected": -4881.1796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.27081298828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.27081298828125, + "step": 1207 + }, + { + "epoch": 12.715789473684211, + "grad_norm": 1.5126262269404833e-06, + "learning_rate": 0.00017473684210526317, + "logits/chosen": 13.1687593460083, + "logits/rejected": 13.1687593460083, + "logps/chosen": -3778.708984375, + "logps/rejected": -3778.708984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0018005371094, + "rewards/margins": 0.0, + "rewards/rejected": -375.0018005371094, + "step": 1208 + }, + { + "epoch": 12.726315789473684, + "grad_norm": 1.347111037830473e-06, + "learning_rate": 0.00017471578947368422, + "logits/chosen": 13.205496788024902, + "logits/rejected": 13.205496788024902, + "logps/chosen": -4324.451171875, + "logps/rejected": -4324.451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.26025390625, + "rewards/margins": 0.0, + "rewards/rejected": -429.26025390625, + "step": 1209 + }, + { + "epoch": 12.736842105263158, + "grad_norm": 9.442120472158422e-07, + "learning_rate": 0.00017469473684210527, + "logits/chosen": 13.167729377746582, + "logits/rejected": 13.167729377746582, + "logps/chosen": -3542.4921875, + "logps/rejected": -3542.4921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2233581542969, + "rewards/margins": 0.0, + "rewards/rejected": -351.2233581542969, + "step": 1210 + }, + { + "epoch": 12.74736842105263, + "grad_norm": 1.1260599421802908e-06, + "learning_rate": 0.00017467368421052632, + "logits/chosen": 13.153641700744629, + "logits/rejected": 13.153641700744629, + "logps/chosen": -3998.673828125, + "logps/rejected": -3998.673828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0400695800781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0400695800781, + "step": 1211 + }, + { + "epoch": 12.757894736842106, + "grad_norm": 1.48184381032479e-06, + "learning_rate": 0.00017465263157894737, + "logits/chosen": 13.15683650970459, + "logits/rejected": 13.15683650970459, + "logps/chosen": -3779.37109375, + "logps/rejected": -3779.37109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0679931640625, + "rewards/margins": 0.0, + "rewards/rejected": -375.0679931640625, + "step": 1212 + }, + { + "epoch": 12.76842105263158, + "grad_norm": 1.5160769635258475e-06, + "learning_rate": 0.00017463157894736844, + "logits/chosen": 13.227375030517578, + "logits/rejected": 13.227375030517578, + "logps/chosen": -5175.3994140625, + "logps/rejected": -5175.3994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6015014648438, + "rewards/margins": 0.0, + "rewards/rejected": -514.6015014648438, + "step": 1213 + }, + { + "epoch": 12.778947368421052, + "grad_norm": 1.5666516901546856e-06, + "learning_rate": 0.0001746105263157895, + "logits/chosen": 13.21477222442627, + "logits/rejected": 13.21477222442627, + "logps/chosen": -4881.1376953125, + "logps/rejected": -4881.1376953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2666015625, + "rewards/margins": 0.0, + "rewards/rejected": -485.2666015625, + "step": 1214 + }, + { + "epoch": 12.789473684210526, + "grad_norm": 1.4243105397326872e-06, + "learning_rate": 0.0001745894736842105, + "logits/chosen": 13.14836311340332, + "logits/rejected": 13.14836311340332, + "logps/chosen": -3999.669921875, + "logps/rejected": -3999.669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1396789550781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1396789550781, + "step": 1215 + }, + { + "epoch": 12.8, + "grad_norm": 1.4788988664804492e-06, + "learning_rate": 0.0001745684210526316, + "logits/chosen": 13.213777542114258, + "logits/rejected": 13.213777542114258, + "logps/chosen": -4881.03466796875, + "logps/rejected": -4881.03466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2563171386719, + "rewards/margins": 0.0, + "rewards/rejected": -485.2563171386719, + "step": 1216 + }, + { + "epoch": 12.810526315789474, + "grad_norm": 1.0042375606644782e-06, + "learning_rate": 0.00017454736842105264, + "logits/chosen": 13.158173561096191, + "logits/rejected": 13.158173561096191, + "logps/chosen": -3757.86328125, + "logps/rejected": -3757.86328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8680114746094, + "rewards/margins": 0.0, + "rewards/rejected": -372.8680114746094, + "step": 1217 + }, + { + "epoch": 12.821052631578947, + "grad_norm": 1.6326533796018339e-06, + "learning_rate": 0.0001745263157894737, + "logits/chosen": 13.223214149475098, + "logits/rejected": 13.223214149475098, + "logps/chosen": -5174.9873046875, + "logps/rejected": -5174.9873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.560302734375, + "rewards/margins": 0.0, + "rewards/rejected": -514.560302734375, + "step": 1218 + }, + { + "epoch": 12.83157894736842, + "grad_norm": 1.5520402030233527e-06, + "learning_rate": 0.00017450526315789474, + "logits/chosen": 13.223483085632324, + "logits/rejected": 13.223483085632324, + "logps/chosen": -5175.1748046875, + "logps/rejected": -5175.1748046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5790405273438, + "rewards/margins": 0.0, + "rewards/rejected": -514.5790405273438, + "step": 1219 + }, + { + "epoch": 12.842105263157894, + "grad_norm": 1.0565128150119563e-06, + "learning_rate": 0.00017448421052631581, + "logits/chosen": 13.136785507202148, + "logits/rejected": 13.136785507202148, + "logps/chosen": -2672.26171875, + "logps/rejected": -2672.26171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.444580078125, + "rewards/margins": 0.0, + "rewards/rejected": -264.444580078125, + "step": 1220 + }, + { + "epoch": 12.852631578947369, + "grad_norm": 1.404454678777256e-06, + "learning_rate": 0.00017446315789473684, + "logits/chosen": 13.16103744506836, + "logits/rejected": 13.16103744506836, + "logps/chosen": -4286.90625, + "logps/rejected": -4286.90625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.8932800292969, + "rewards/margins": 0.0, + "rewards/rejected": -425.8932800292969, + "step": 1221 + }, + { + "epoch": 12.863157894736842, + "grad_norm": 1.1864430007335613e-06, + "learning_rate": 0.00017444210526315789, + "logits/chosen": 13.14920425415039, + "logits/rejected": 13.14920425415039, + "logps/chosen": -3999.20703125, + "logps/rejected": -3999.20703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0933837890625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0933837890625, + "step": 1222 + }, + { + "epoch": 12.873684210526315, + "grad_norm": 1.3955528856968158e-06, + "learning_rate": 0.00017442105263157896, + "logits/chosen": 13.160967826843262, + "logits/rejected": 13.160967826843262, + "logps/chosen": -4287.162109375, + "logps/rejected": -4287.162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9188537597656, + "rewards/margins": 0.0, + "rewards/rejected": -425.9188537597656, + "step": 1223 + }, + { + "epoch": 12.884210526315789, + "grad_norm": 1.1030474524886813e-06, + "learning_rate": 0.0001744, + "logits/chosen": 13.143211364746094, + "logits/rejected": 13.143211364746094, + "logps/chosen": -3999.69140625, + "logps/rejected": -3999.69140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1418151855469, + "rewards/margins": 0.0, + "rewards/rejected": -397.1418151855469, + "step": 1224 + }, + { + "epoch": 12.894736842105264, + "grad_norm": 1.0810474577738205e-06, + "learning_rate": 0.00017437894736842106, + "logits/chosen": 13.135416030883789, + "logits/rejected": 13.135416030883789, + "logps/chosen": -4000.001953125, + "logps/rejected": -4000.001953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1728820800781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1728820800781, + "step": 1225 + }, + { + "epoch": 12.905263157894737, + "grad_norm": 1.5307930425478844e-06, + "learning_rate": 0.0001743578947368421, + "logits/chosen": 13.195619583129883, + "logits/rejected": 13.195619583129883, + "logps/chosen": -4880.837890625, + "logps/rejected": -4880.837890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.23663330078125, + "rewards/margins": 0.0, + "rewards/rejected": -485.23663330078125, + "step": 1226 + }, + { + "epoch": 12.91578947368421, + "grad_norm": 1.5578589227516204e-06, + "learning_rate": 0.00017433684210526319, + "logits/chosen": 13.124464988708496, + "logits/rejected": 13.124464988708496, + "logps/chosen": -3778.41015625, + "logps/rejected": -3778.41015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9718933105469, + "rewards/margins": 0.0, + "rewards/rejected": -374.9718933105469, + "step": 1227 + }, + { + "epoch": 12.926315789473684, + "grad_norm": 1.81817688371666e-06, + "learning_rate": 0.0001743157894736842, + "logits/chosen": 13.194299697875977, + "logits/rejected": 13.194299697875977, + "logps/chosen": -5175.64404296875, + "logps/rejected": -5175.64404296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6259765625, + "rewards/margins": 0.0, + "rewards/rejected": -514.6259765625, + "step": 1228 + }, + { + "epoch": 12.936842105263159, + "grad_norm": 1.4195894664226216e-06, + "learning_rate": 0.00017429473684210526, + "logits/chosen": 13.122154235839844, + "logits/rejected": 13.122154235839844, + "logps/chosen": -4288.3291015625, + "logps/rejected": -4288.3291015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0355529785156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0355529785156, + "step": 1229 + }, + { + "epoch": 12.947368421052632, + "grad_norm": 1.5928274024190614e-06, + "learning_rate": 0.00017427368421052633, + "logits/chosen": 13.157710075378418, + "logits/rejected": 13.157710075378418, + "logps/chosen": -4324.19921875, + "logps/rejected": -4324.19921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.2350769042969, + "rewards/margins": 0.0, + "rewards/rejected": -429.2350769042969, + "step": 1230 + }, + { + "epoch": 12.957894736842105, + "grad_norm": 9.173579087473627e-07, + "learning_rate": 0.00017425263157894738, + "logits/chosen": 13.101690292358398, + "logits/rejected": 13.101690292358398, + "logps/chosen": -2672.404296875, + "logps/rejected": -2672.404296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4588317871094, + "rewards/margins": 0.0, + "rewards/rejected": -264.4588317871094, + "step": 1231 + }, + { + "epoch": 12.968421052631578, + "grad_norm": 9.18890009415918e-07, + "learning_rate": 0.00017423157894736843, + "logits/chosen": 13.104532241821289, + "logits/rejected": 13.104532241821289, + "logps/chosen": -2672.669921875, + "logps/rejected": -2672.669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.48541259765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.48541259765625, + "step": 1232 + }, + { + "epoch": 12.978947368421053, + "grad_norm": 8.989978823592537e-07, + "learning_rate": 0.00017421052631578948, + "logits/chosen": 13.12751293182373, + "logits/rejected": 13.12751293182373, + "logps/chosen": -2966.474609375, + "logps/rejected": -2966.474609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9212646484375, + "rewards/margins": 0.0, + "rewards/rejected": -293.9212646484375, + "step": 1233 + }, + { + "epoch": 12.989473684210527, + "grad_norm": 4.231207185512176e-06, + "learning_rate": 0.00017418947368421053, + "logits/chosen": 13.189374923706055, + "logits/rejected": 13.189374923706055, + "logps/chosen": -4880.4609375, + "logps/rejected": -4880.4609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1989440917969, + "rewards/margins": 0.0, + "rewards/rejected": -485.1989440917969, + "step": 1234 + }, + { + "epoch": 13.0, + "grad_norm": 1.278900299439556e-06, + "learning_rate": 0.00017416842105263158, + "logits/chosen": 13.141138076782227, + "logits/rejected": 13.141138076782227, + "logps/chosen": -3540.908203125, + "logps/rejected": -3540.908203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.06494140625, + "rewards/margins": 0.0, + "rewards/rejected": -351.06494140625, + "step": 1235 + }, + { + "epoch": 13.010526315789473, + "grad_norm": 4.298643034417182e-06, + "learning_rate": 0.00017414736842105263, + "logits/chosen": 13.135010719299316, + "logits/rejected": 13.135010719299316, + "logps/chosen": -3999.578125, + "logps/rejected": -3999.578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1304931640625, + "rewards/margins": 0.0, + "rewards/rejected": -397.1304931640625, + "step": 1236 + }, + { + "epoch": 13.021052631578947, + "grad_norm": 1.3581113762484165e-06, + "learning_rate": 0.0001741263157894737, + "logits/chosen": 13.157971382141113, + "logits/rejected": 13.157971382141113, + "logps/chosen": -3758.025390625, + "logps/rejected": -3758.025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8842468261719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8842468261719, + "step": 1237 + }, + { + "epoch": 13.031578947368422, + "grad_norm": 8.696854933987197e-07, + "learning_rate": 0.00017410526315789475, + "logits/chosen": 13.139606475830078, + "logits/rejected": 13.139606475830078, + "logps/chosen": -2673.958984375, + "logps/rejected": -2673.958984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.61431884765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.61431884765625, + "step": 1238 + }, + { + "epoch": 13.042105263157895, + "grad_norm": 5.090080321679125e-06, + "learning_rate": 0.0001740842105263158, + "logits/chosen": 13.165019989013672, + "logits/rejected": 13.165019989013672, + "logps/chosen": -4289.61669921875, + "logps/rejected": -4289.61669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.164306640625, + "rewards/margins": 0.0, + "rewards/rejected": -426.164306640625, + "step": 1239 + }, + { + "epoch": 13.052631578947368, + "grad_norm": 1.8682176232687198e-06, + "learning_rate": 0.00017406315789473685, + "logits/chosen": 13.158531188964844, + "logits/rejected": 13.158531188964844, + "logps/chosen": -3778.431640625, + "logps/rejected": -3778.431640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.97406005859375, + "rewards/margins": 0.0, + "rewards/rejected": -374.97406005859375, + "step": 1240 + }, + { + "epoch": 13.063157894736841, + "grad_norm": 2.378750195930479e-06, + "learning_rate": 0.0001740421052631579, + "logits/chosen": 13.16057014465332, + "logits/rejected": 13.16057014465332, + "logps/chosen": -2967.625, + "logps/rejected": -2967.625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.03631591796875, + "rewards/margins": 0.0, + "rewards/rejected": -294.03631591796875, + "step": 1241 + }, + { + "epoch": 13.073684210526316, + "grad_norm": 2.5235326575057115e-06, + "learning_rate": 0.00017402105263157895, + "logits/chosen": 13.167869567871094, + "logits/rejected": 13.167869567871094, + "logps/chosen": -3758.5048828125, + "logps/rejected": -3758.5048828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.93218994140625, + "rewards/margins": 0.0, + "rewards/rejected": -372.93218994140625, + "step": 1242 + }, + { + "epoch": 13.08421052631579, + "grad_norm": 1.0824261380548705e-06, + "learning_rate": 0.000174, + "logits/chosen": 13.170663833618164, + "logits/rejected": 13.170663833618164, + "logps/chosen": -3541.8671875, + "logps/rejected": -3541.8671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1608581542969, + "rewards/margins": 0.0, + "rewards/rejected": -351.1608581542969, + "step": 1243 + }, + { + "epoch": 13.094736842105263, + "grad_norm": 1.986631787076476e-06, + "learning_rate": 0.00017397894736842105, + "logits/chosen": 13.174274444580078, + "logits/rejected": 13.174274444580078, + "logps/chosen": -4291.10791015625, + "logps/rejected": -4291.10791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.3134460449219, + "rewards/margins": 0.0, + "rewards/rejected": -426.3134460449219, + "step": 1244 + }, + { + "epoch": 13.105263157894736, + "grad_norm": 9.613373777028755e-07, + "learning_rate": 0.00017395789473684213, + "logits/chosen": 13.155138969421387, + "logits/rejected": 13.155138969421387, + "logps/chosen": -2675.404296875, + "logps/rejected": -2675.404296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.75885009765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.75885009765625, + "step": 1245 + }, + { + "epoch": 13.115789473684211, + "grad_norm": 2.8787717383238487e-06, + "learning_rate": 0.00017393684210526318, + "logits/chosen": 13.171771049499512, + "logits/rejected": 13.171771049499512, + "logps/chosen": -3779.2490234375, + "logps/rejected": -3779.2490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0557861328125, + "rewards/margins": 0.0, + "rewards/rejected": -375.0557861328125, + "step": 1246 + }, + { + "epoch": 13.126315789473685, + "grad_norm": 9.68525910138851e-07, + "learning_rate": 0.0001739157894736842, + "logits/chosen": 13.179234504699707, + "logits/rejected": 13.179234504699707, + "logps/chosen": -3542.28515625, + "logps/rejected": -3542.28515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.20263671875, + "rewards/margins": 0.0, + "rewards/rejected": -351.20263671875, + "step": 1247 + }, + { + "epoch": 13.136842105263158, + "grad_norm": 2.1995188035361934e-06, + "learning_rate": 0.00017389473684210527, + "logits/chosen": 13.164140701293945, + "logits/rejected": 13.164140701293945, + "logps/chosen": -3998.693359375, + "logps/rejected": -3998.693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0420227050781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0420227050781, + "step": 1248 + }, + { + "epoch": 13.147368421052631, + "grad_norm": 2.7806797788798576e-06, + "learning_rate": 0.00017387368421052632, + "logits/chosen": 13.156556129455566, + "logits/rejected": 13.156556129455566, + "logps/chosen": -3998.7265625, + "logps/rejected": -3998.7265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.04534912109375, + "rewards/margins": 0.0, + "rewards/rejected": -397.04534912109375, + "step": 1249 + }, + { + "epoch": 13.157894736842104, + "grad_norm": 1.0588154282231699e-06, + "learning_rate": 0.00017385263157894737, + "logits/chosen": 13.135560989379883, + "logits/rejected": 13.135560989379883, + "logps/chosen": -2676.119140625, + "logps/rejected": -2676.119140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.830322265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.830322265625, + "step": 1250 + }, + { + "epoch": 13.157894736842104, + "eval_logits/chosen": 13.177592277526855, + "eval_logits/rejected": 13.177592277526855, + "eval_logps/chosen": -4309.9306640625, + "eval_logps/rejected": -4309.9306640625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.0899353027344, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.0899353027344, + "eval_runtime": 4.3262, + "eval_samples_per_second": 2.311, + "eval_steps_per_second": 2.311, + "step": 1250 + }, + { + "epoch": 13.16842105263158, + "grad_norm": 1.6972080629784614e-06, + "learning_rate": 0.00017383157894736842, + "logits/chosen": 13.1998872756958, + "logits/rejected": 13.1998872756958, + "logps/chosen": -4878.42431640625, + "logps/rejected": -4878.42431640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9952697753906, + "rewards/margins": 0.0, + "rewards/rejected": -484.9952697753906, + "step": 1251 + }, + { + "epoch": 13.178947368421053, + "grad_norm": 2.2566489406017354e-06, + "learning_rate": 0.0001738105263157895, + "logits/chosen": 13.137022018432617, + "logits/rejected": 13.137022018432617, + "logps/chosen": -3542.4072265625, + "logps/rejected": -3542.4072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.21484375, + "rewards/margins": 0.0, + "rewards/rejected": -351.21484375, + "step": 1252 + }, + { + "epoch": 13.189473684210526, + "grad_norm": 2.3852032882132335e-06, + "learning_rate": 0.00017378947368421052, + "logits/chosen": 13.186307907104492, + "logits/rejected": 13.186307907104492, + "logps/chosen": -4878.13134765625, + "logps/rejected": -4878.13134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9659729003906, + "rewards/margins": 0.0, + "rewards/rejected": -484.9659729003906, + "step": 1253 + }, + { + "epoch": 13.2, + "grad_norm": 1.001463942884584e-06, + "learning_rate": 0.00017376842105263157, + "logits/chosen": 13.128952026367188, + "logits/rejected": 13.128952026367188, + "logps/chosen": -2969.6259765625, + "logps/rejected": -2969.6259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.2364196777344, + "rewards/margins": 0.0, + "rewards/rejected": -294.2364196777344, + "step": 1254 + }, + { + "epoch": 13.210526315789474, + "grad_norm": 2.3843738290452166e-06, + "learning_rate": 0.00017374736842105265, + "logits/chosen": 13.166271209716797, + "logits/rejected": 13.166271209716797, + "logps/chosen": -4325.814453125, + "logps/rejected": -4325.814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3965759277344, + "rewards/margins": 0.0, + "rewards/rejected": -429.3965759277344, + "step": 1255 + }, + { + "epoch": 13.221052631578948, + "grad_norm": 1.1960969459323678e-06, + "learning_rate": 0.0001737263157894737, + "logits/chosen": 13.117654800415039, + "logits/rejected": 13.117654800415039, + "logps/chosen": -3998.9453125, + "logps/rejected": -3998.9453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0672302246094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0672302246094, + "step": 1256 + }, + { + "epoch": 13.23157894736842, + "grad_norm": 2.0701395442301873e-06, + "learning_rate": 0.00017370526315789474, + "logits/chosen": 13.196760177612305, + "logits/rejected": 13.196760177612305, + "logps/chosen": -5171.2568359375, + "logps/rejected": -5171.2568359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.187255859375, + "rewards/margins": 0.0, + "rewards/rejected": -514.187255859375, + "step": 1257 + }, + { + "epoch": 13.242105263157894, + "grad_norm": 2.019994553847937e-06, + "learning_rate": 0.0001736842105263158, + "logits/chosen": 13.11418342590332, + "logits/rejected": 13.11418342590332, + "logps/chosen": -3999.14453125, + "logps/rejected": -3999.14453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0871276855469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0871276855469, + "step": 1258 + }, + { + "epoch": 13.25263157894737, + "grad_norm": 1.033930630001123e-06, + "learning_rate": 0.00017366315789473687, + "logits/chosen": 13.123323440551758, + "logits/rejected": 13.123323440551758, + "logps/chosen": -3542.9091796875, + "logps/rejected": -3542.9091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2650451660156, + "rewards/margins": 0.0, + "rewards/rejected": -351.2650451660156, + "step": 1259 + }, + { + "epoch": 13.263157894736842, + "grad_norm": 9.739521829033038e-07, + "learning_rate": 0.0001736421052631579, + "logits/chosen": 13.119747161865234, + "logits/rejected": 13.119747161865234, + "logps/chosen": -3543.3935546875, + "logps/rejected": -3543.3935546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3134765625, + "rewards/margins": 0.0, + "rewards/rejected": -351.3134765625, + "step": 1260 + }, + { + "epoch": 13.273684210526316, + "grad_norm": 2.752431100816466e-06, + "learning_rate": 0.00017362105263157894, + "logits/chosen": 13.173813819885254, + "logits/rejected": 13.173813819885254, + "logps/chosen": -4878.8154296875, + "logps/rejected": -4878.8154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0343933105469, + "rewards/margins": 0.0, + "rewards/rejected": -485.0343933105469, + "step": 1261 + }, + { + "epoch": 13.284210526315789, + "grad_norm": 1.5259577139659086e-06, + "learning_rate": 0.00017360000000000002, + "logits/chosen": 13.176005363464355, + "logits/rejected": 13.176005363464355, + "logps/chosen": -4879.44189453125, + "logps/rejected": -4879.44189453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0970153808594, + "rewards/margins": 0.0, + "rewards/rejected": -485.0970153808594, + "step": 1262 + }, + { + "epoch": 13.294736842105262, + "grad_norm": 1.538273522783129e-06, + "learning_rate": 0.00017357894736842107, + "logits/chosen": 13.179133415222168, + "logits/rejected": 13.179133415222168, + "logps/chosen": -4879.802734375, + "logps/rejected": -4879.802734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.13311767578125, + "rewards/margins": 0.0, + "rewards/rejected": -485.13311767578125, + "step": 1263 + }, + { + "epoch": 13.305263157894737, + "grad_norm": 1.694831439635891e-06, + "learning_rate": 0.00017355789473684212, + "logits/chosen": 13.132376670837402, + "logits/rejected": 13.132376670837402, + "logps/chosen": -3543.6337890625, + "logps/rejected": -3543.6337890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3374938964844, + "rewards/margins": 0.0, + "rewards/rejected": -351.3374938964844, + "step": 1264 + }, + { + "epoch": 13.31578947368421, + "grad_norm": 1.55275188262749e-06, + "learning_rate": 0.00017353684210526317, + "logits/chosen": 13.198293685913086, + "logits/rejected": 13.198293685913086, + "logps/chosen": -4880.35546875, + "logps/rejected": -4880.35546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1883850097656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1883850097656, + "step": 1265 + }, + { + "epoch": 13.326315789473684, + "grad_norm": 1.3666732456840691e-06, + "learning_rate": 0.00017351578947368422, + "logits/chosen": 13.14201545715332, + "logits/rejected": 13.14201545715332, + "logps/chosen": -3999.75, + "logps/rejected": -3999.75, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1476745605469, + "rewards/margins": 0.0, + "rewards/rejected": -397.1476745605469, + "step": 1266 + }, + { + "epoch": 13.336842105263157, + "grad_norm": 1.4160054888634477e-06, + "learning_rate": 0.00017349473684210526, + "logits/chosen": 13.16655445098877, + "logits/rejected": 13.16655445098877, + "logps/chosen": -3543.83984375, + "logps/rejected": -3543.83984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3581237792969, + "rewards/margins": 0.0, + "rewards/rejected": -351.3581237792969, + "step": 1267 + }, + { + "epoch": 13.347368421052632, + "grad_norm": 1.955471134351683e-06, + "learning_rate": 0.00017347368421052631, + "logits/chosen": 13.16107177734375, + "logits/rejected": 13.16107177734375, + "logps/chosen": -3999.6640625, + "logps/rejected": -3999.6640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.13909912109375, + "rewards/margins": 0.0, + "rewards/rejected": -397.13909912109375, + "step": 1268 + }, + { + "epoch": 13.357894736842105, + "grad_norm": 1.1480233297334053e-06, + "learning_rate": 0.0001734526315789474, + "logits/chosen": 13.156723976135254, + "logits/rejected": 13.156723976135254, + "logps/chosen": -2673.8017578125, + "logps/rejected": -2673.8017578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5986022949219, + "rewards/margins": 0.0, + "rewards/rejected": -264.5986022949219, + "step": 1269 + }, + { + "epoch": 13.368421052631579, + "grad_norm": 9.004695016301412e-07, + "learning_rate": 0.00017343157894736844, + "logits/chosen": 13.160689353942871, + "logits/rejected": 13.160689353942871, + "logps/chosen": -2673.66796875, + "logps/rejected": -2673.66796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.585205078125, + "rewards/margins": 0.0, + "rewards/rejected": -264.585205078125, + "step": 1270 + }, + { + "epoch": 13.378947368421052, + "grad_norm": 2.334185182917281e-06, + "learning_rate": 0.0001734105263157895, + "logits/chosen": 13.22353744506836, + "logits/rejected": 13.22353744506836, + "logps/chosen": -4325.568359375, + "logps/rejected": -4325.568359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3719787597656, + "rewards/margins": 0.0, + "rewards/rejected": -429.3719787597656, + "step": 1271 + }, + { + "epoch": 13.389473684210527, + "grad_norm": 2.050010152743198e-06, + "learning_rate": 0.0001733894736842105, + "logits/chosen": 13.185863494873047, + "logits/rejected": 13.185863494873047, + "logps/chosen": -4287.62451171875, + "logps/rejected": -4287.62451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.965087890625, + "rewards/margins": 0.0, + "rewards/rejected": -425.965087890625, + "step": 1272 + }, + { + "epoch": 13.4, + "grad_norm": 1.47646130699286e-06, + "learning_rate": 0.0001733684210526316, + "logits/chosen": 13.224810600280762, + "logits/rejected": 13.224810600280762, + "logps/chosen": -4325.912109375, + "logps/rejected": -4325.912109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4063415527344, + "rewards/margins": 0.0, + "rewards/rejected": -429.4063415527344, + "step": 1273 + }, + { + "epoch": 13.410526315789474, + "grad_norm": 1.1658304401862551e-06, + "learning_rate": 0.00017334736842105264, + "logits/chosen": 13.187033653259277, + "logits/rejected": 13.187033653259277, + "logps/chosen": -2968.05859375, + "logps/rejected": -2968.05859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0796813964844, + "rewards/margins": 0.0, + "rewards/rejected": -294.0796813964844, + "step": 1274 + }, + { + "epoch": 13.421052631578947, + "grad_norm": 2.672120899660513e-06, + "learning_rate": 0.00017332631578947369, + "logits/chosen": 13.256819725036621, + "logits/rejected": 13.256819725036621, + "logps/chosen": -5173.73828125, + "logps/rejected": -5173.73828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4354248046875, + "rewards/margins": 0.0, + "rewards/rejected": -514.4354248046875, + "step": 1275 + }, + { + "epoch": 13.431578947368422, + "grad_norm": 1.5713484344814788e-06, + "learning_rate": 0.00017330526315789474, + "logits/chosen": 13.174192428588867, + "logits/rejected": 13.174192428588867, + "logps/chosen": -4000.41015625, + "logps/rejected": -4000.41015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.2137145996094, + "rewards/margins": 0.0, + "rewards/rejected": -397.2137145996094, + "step": 1276 + }, + { + "epoch": 13.442105263157895, + "grad_norm": 1.8625045186126954e-06, + "learning_rate": 0.0001732842105263158, + "logits/chosen": 13.251129150390625, + "logits/rejected": 13.251129150390625, + "logps/chosen": -5174.11474609375, + "logps/rejected": -5174.11474609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4730834960938, + "rewards/margins": 0.0, + "rewards/rejected": -514.4730834960938, + "step": 1277 + }, + { + "epoch": 13.452631578947368, + "grad_norm": 1.4260409670896479e-06, + "learning_rate": 0.00017326315789473686, + "logits/chosen": 13.181211471557617, + "logits/rejected": 13.181211471557617, + "logps/chosen": -3757.837890625, + "logps/rejected": -3757.837890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.865478515625, + "rewards/margins": 0.0, + "rewards/rejected": -372.865478515625, + "step": 1278 + }, + { + "epoch": 13.463157894736842, + "grad_norm": 1.514201471763954e-06, + "learning_rate": 0.00017324210526315788, + "logits/chosen": 13.172541618347168, + "logits/rejected": 13.172541618347168, + "logps/chosen": -3777.443359375, + "logps/rejected": -3777.443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8752136230469, + "rewards/margins": 0.0, + "rewards/rejected": -374.8752136230469, + "step": 1279 + }, + { + "epoch": 13.473684210526315, + "grad_norm": 1.5647108284611022e-06, + "learning_rate": 0.00017322105263157896, + "logits/chosen": 13.17613410949707, + "logits/rejected": 13.17613410949707, + "logps/chosen": -4287.66015625, + "logps/rejected": -4287.66015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9686584472656, + "rewards/margins": 0.0, + "rewards/rejected": -425.9686584472656, + "step": 1280 + }, + { + "epoch": 13.48421052631579, + "grad_norm": 2.22114476855495e-06, + "learning_rate": 0.0001732, + "logits/chosen": 13.242491722106934, + "logits/rejected": 13.242491722106934, + "logps/chosen": -5175.3720703125, + "logps/rejected": -5175.3720703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5988159179688, + "rewards/margins": 0.0, + "rewards/rejected": -514.5988159179688, + "step": 1281 + }, + { + "epoch": 13.494736842105263, + "grad_norm": 1.0163159913645359e-06, + "learning_rate": 0.00017317894736842106, + "logits/chosen": 13.173091888427734, + "logits/rejected": 13.173091888427734, + "logps/chosen": -3758.30859375, + "logps/rejected": -3758.30859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9125671386719, + "rewards/margins": 0.0, + "rewards/rejected": -372.9125671386719, + "step": 1282 + }, + { + "epoch": 13.505263157894737, + "grad_norm": 9.761752153281122e-07, + "learning_rate": 0.0001731578947368421, + "logits/chosen": 13.173741340637207, + "logits/rejected": 13.173741340637207, + "logps/chosen": -3544.259765625, + "logps/rejected": -3544.259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4001159667969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4001159667969, + "step": 1283 + }, + { + "epoch": 13.51578947368421, + "grad_norm": 1.6959684216999449e-06, + "learning_rate": 0.00017313684210526318, + "logits/chosen": 13.15912914276123, + "logits/rejected": 13.15912914276123, + "logps/chosen": -3999.373046875, + "logps/rejected": -3999.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1099853515625, + "rewards/margins": 0.0, + "rewards/rejected": -397.1099853515625, + "step": 1284 + }, + { + "epoch": 13.526315789473685, + "grad_norm": 1.46214756568952e-06, + "learning_rate": 0.0001731157894736842, + "logits/chosen": 13.164461135864258, + "logits/rejected": 13.164461135864258, + "logps/chosen": -3777.279296875, + "logps/rejected": -3777.279296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.85882568359375, + "rewards/margins": 0.0, + "rewards/rejected": -374.85882568359375, + "step": 1285 + }, + { + "epoch": 13.536842105263158, + "grad_norm": 1.5509444892813917e-06, + "learning_rate": 0.00017309473684210526, + "logits/chosen": 13.238690376281738, + "logits/rejected": 13.238690376281738, + "logps/chosen": -5176.623046875, + "logps/rejected": -5176.623046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.723876953125, + "rewards/margins": 0.0, + "rewards/rejected": -514.723876953125, + "step": 1286 + }, + { + "epoch": 13.547368421052632, + "grad_norm": 1.7104727021433064e-06, + "learning_rate": 0.00017307368421052633, + "logits/chosen": 13.226909637451172, + "logits/rejected": 13.226909637451172, + "logps/chosen": -4879.6728515625, + "logps/rejected": -4879.6728515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1201171875, + "rewards/margins": 0.0, + "rewards/rejected": -485.1201171875, + "step": 1287 + }, + { + "epoch": 13.557894736842105, + "grad_norm": 1.0637072591634933e-06, + "learning_rate": 0.00017305263157894738, + "logits/chosen": 13.172325134277344, + "logits/rejected": 13.172325134277344, + "logps/chosen": -2968.44140625, + "logps/rejected": -2968.44140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1179504394531, + "rewards/margins": 0.0, + "rewards/rejected": -294.1179504394531, + "step": 1288 + }, + { + "epoch": 13.568421052631578, + "grad_norm": 1.2580004522533272e-06, + "learning_rate": 0.00017303157894736843, + "logits/chosen": 13.232382774353027, + "logits/rejected": 13.232382774353027, + "logps/chosen": -4879.8671875, + "logps/rejected": -4879.8671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1395568847656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1395568847656, + "step": 1289 + }, + { + "epoch": 13.578947368421053, + "grad_norm": 9.166489007839118e-07, + "learning_rate": 0.00017301052631578948, + "logits/chosen": 13.154160499572754, + "logits/rejected": 13.154160499572754, + "logps/chosen": -2673.2763671875, + "logps/rejected": -2673.2763671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5460510253906, + "rewards/margins": 0.0, + "rewards/rejected": -264.5460510253906, + "step": 1290 + }, + { + "epoch": 13.589473684210526, + "grad_norm": 2.135568138328381e-06, + "learning_rate": 0.00017298947368421056, + "logits/chosen": 13.238767623901367, + "logits/rejected": 13.238767623901367, + "logps/chosen": -4880.556640625, + "logps/rejected": -4880.556640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.20849609375, + "rewards/margins": 0.0, + "rewards/rejected": -485.20849609375, + "step": 1291 + }, + { + "epoch": 13.6, + "grad_norm": 1.4988737575549749e-06, + "learning_rate": 0.00017296842105263158, + "logits/chosen": 13.18018627166748, + "logits/rejected": 13.18018627166748, + "logps/chosen": -3778.162109375, + "logps/rejected": -3778.162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9471130371094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9471130371094, + "step": 1292 + }, + { + "epoch": 13.610526315789473, + "grad_norm": 1.0861937198569649e-06, + "learning_rate": 0.00017294736842105263, + "logits/chosen": 13.191993713378906, + "logits/rejected": 13.191993713378906, + "logps/chosen": -3759.056640625, + "logps/rejected": -3759.056640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.98736572265625, + "rewards/margins": 0.0, + "rewards/rejected": -372.98736572265625, + "step": 1293 + }, + { + "epoch": 13.621052631578948, + "grad_norm": 1.4736940556758782e-06, + "learning_rate": 0.0001729263157894737, + "logits/chosen": 13.25374984741211, + "logits/rejected": 13.25374984741211, + "logps/chosen": -4880.7998046875, + "logps/rejected": -4880.7998046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2328186035156, + "rewards/margins": 0.0, + "rewards/rejected": -485.2328186035156, + "step": 1294 + }, + { + "epoch": 13.631578947368421, + "grad_norm": 1.1962823691646918e-06, + "learning_rate": 0.00017290526315789475, + "logits/chosen": 13.172516822814941, + "logits/rejected": 13.172516822814941, + "logps/chosen": -2673.259765625, + "logps/rejected": -2673.259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5444030761719, + "rewards/margins": 0.0, + "rewards/rejected": -264.5444030761719, + "step": 1295 + }, + { + "epoch": 13.642105263157895, + "grad_norm": 8.691646371516981e-07, + "learning_rate": 0.0001728842105263158, + "logits/chosen": 13.202339172363281, + "logits/rejected": 13.202339172363281, + "logps/chosen": -2968.2421875, + "logps/rejected": -2968.2421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0980224609375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0980224609375, + "step": 1296 + }, + { + "epoch": 13.652631578947368, + "grad_norm": 1.582819322720752e-06, + "learning_rate": 0.00017286315789473685, + "logits/chosen": 13.187515258789062, + "logits/rejected": 13.187515258789062, + "logps/chosen": -3997.517578125, + "logps/rejected": -3997.517578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9244384765625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9244384765625, + "step": 1297 + }, + { + "epoch": 13.663157894736843, + "grad_norm": 3.0316725769807817e-06, + "learning_rate": 0.0001728421052631579, + "logits/chosen": 13.27031135559082, + "logits/rejected": 13.27031135559082, + "logps/chosen": -5176.4931640625, + "logps/rejected": -5176.4931640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7108764648438, + "rewards/margins": 0.0, + "rewards/rejected": -514.7108764648438, + "step": 1298 + }, + { + "epoch": 13.673684210526316, + "grad_norm": 1.8782495772029506e-06, + "learning_rate": 0.00017282105263157895, + "logits/chosen": 13.197049140930176, + "logits/rejected": 13.197049140930176, + "logps/chosen": -4289.2333984375, + "logps/rejected": -4289.2333984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1259765625, + "rewards/margins": 0.0, + "rewards/rejected": -426.1259765625, + "step": 1299 + }, + { + "epoch": 13.68421052631579, + "grad_norm": 1.786511802492896e-06, + "learning_rate": 0.0001728, + "logits/chosen": 13.179607391357422, + "logits/rejected": 13.179607391357422, + "logps/chosen": -3997.408203125, + "logps/rejected": -3997.408203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.91351318359375, + "rewards/margins": 0.0, + "rewards/rejected": -396.91351318359375, + "step": 1300 + }, + { + "epoch": 13.68421052631579, + "eval_logits/chosen": 13.220492362976074, + "eval_logits/rejected": 13.220492362976074, + "eval_logps/chosen": -4311.43017578125, + "eval_logps/rejected": -4311.43017578125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.2398376464844, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.2398376464844, + "eval_runtime": 4.4434, + "eval_samples_per_second": 2.251, + "eval_steps_per_second": 2.251, + "step": 1300 + }, + { + "epoch": 13.694736842105263, + "grad_norm": 1.7261189668715815e-06, + "learning_rate": 0.00017277894736842108, + "logits/chosen": 13.256749153137207, + "logits/rejected": 13.256749153137207, + "logps/chosen": -5176.52001953125, + "logps/rejected": -5176.52001953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7135620117188, + "rewards/margins": 0.0, + "rewards/rejected": -514.7135620117188, + "step": 1301 + }, + { + "epoch": 13.705263157894738, + "grad_norm": 1.0206708793703e-06, + "learning_rate": 0.00017275789473684212, + "logits/chosen": 13.154136657714844, + "logits/rejected": 13.154136657714844, + "logps/chosen": -2673.3359375, + "logps/rejected": -2673.3359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.552001953125, + "rewards/margins": 0.0, + "rewards/rejected": -264.552001953125, + "step": 1302 + }, + { + "epoch": 13.715789473684211, + "grad_norm": 1.119861849474546e-06, + "learning_rate": 0.00017273684210526317, + "logits/chosen": 13.173050880432129, + "logits/rejected": 13.173050880432129, + "logps/chosen": -3542.158203125, + "logps/rejected": -3542.158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.18994140625, + "rewards/margins": 0.0, + "rewards/rejected": -351.18994140625, + "step": 1303 + }, + { + "epoch": 13.726315789473684, + "grad_norm": 1.6815436083561508e-06, + "learning_rate": 0.0001727157894736842, + "logits/chosen": 13.161462783813477, + "logits/rejected": 13.161462783813477, + "logps/chosen": -3779.09375, + "logps/rejected": -3779.09375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0402526855469, + "rewards/margins": 0.0, + "rewards/rejected": -375.0402526855469, + "step": 1304 + }, + { + "epoch": 13.736842105263158, + "grad_norm": 1.0372693850513315e-06, + "learning_rate": 0.00017269473684210527, + "logits/chosen": 13.166621208190918, + "logits/rejected": 13.166621208190918, + "logps/chosen": -3542.416015625, + "logps/rejected": -3542.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2157287597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2157287597656, + "step": 1305 + }, + { + "epoch": 13.74736842105263, + "grad_norm": 1.5220086879708106e-06, + "learning_rate": 0.00017267368421052632, + "logits/chosen": 13.23691177368164, + "logits/rejected": 13.23691177368164, + "logps/chosen": -5176.58203125, + "logps/rejected": -5176.58203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7197875976562, + "rewards/margins": 0.0, + "rewards/rejected": -514.7197875976562, + "step": 1306 + }, + { + "epoch": 13.757894736842106, + "grad_norm": 1.469584958613268e-06, + "learning_rate": 0.00017265263157894737, + "logits/chosen": 13.17170238494873, + "logits/rejected": 13.17170238494873, + "logps/chosen": -3542.32421875, + "logps/rejected": -3542.32421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.20654296875, + "rewards/margins": 0.0, + "rewards/rejected": -351.20654296875, + "step": 1307 + }, + { + "epoch": 13.76842105263158, + "grad_norm": 1.43769204896671e-06, + "learning_rate": 0.00017263157894736842, + "logits/chosen": 13.16417407989502, + "logits/rejected": 13.16417407989502, + "logps/chosen": -3997.89453125, + "logps/rejected": -3997.89453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9621276855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9621276855469, + "step": 1308 + }, + { + "epoch": 13.778947368421052, + "grad_norm": 1.658162091189297e-06, + "learning_rate": 0.0001726105263157895, + "logits/chosen": 13.219422340393066, + "logits/rejected": 13.219422340393066, + "logps/chosen": -4326.048828125, + "logps/rejected": -4326.048828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4200134277344, + "rewards/margins": 0.0, + "rewards/rejected": -429.4200134277344, + "step": 1309 + }, + { + "epoch": 13.789473684210526, + "grad_norm": 8.727628824090061e-07, + "learning_rate": 0.00017258947368421055, + "logits/chosen": 13.190666198730469, + "logits/rejected": 13.190666198730469, + "logps/chosen": -2967.962890625, + "logps/rejected": -2967.962890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0700988769531, + "rewards/margins": 0.0, + "rewards/rejected": -294.0700988769531, + "step": 1310 + }, + { + "epoch": 13.8, + "grad_norm": 3.942923740396509e-06, + "learning_rate": 0.00017256842105263157, + "logits/chosen": 13.250381469726562, + "logits/rejected": 13.250381469726562, + "logps/chosen": -4880.02392578125, + "logps/rejected": -4880.02392578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1552429199219, + "rewards/margins": 0.0, + "rewards/rejected": -485.1552429199219, + "step": 1311 + }, + { + "epoch": 13.810526315789474, + "grad_norm": 1.4225306586013176e-06, + "learning_rate": 0.00017254736842105264, + "logits/chosen": 13.25527286529541, + "logits/rejected": 13.25527286529541, + "logps/chosen": -4880.912109375, + "logps/rejected": -4880.912109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2440490722656, + "rewards/margins": 0.0, + "rewards/rejected": -485.2440490722656, + "step": 1312 + }, + { + "epoch": 13.821052631578947, + "grad_norm": 2.981441184601863e-06, + "learning_rate": 0.0001725263157894737, + "logits/chosen": 13.194138526916504, + "logits/rejected": 13.194138526916504, + "logps/chosen": -3778.814453125, + "logps/rejected": -3778.814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0123291015625, + "rewards/margins": 0.0, + "rewards/rejected": -375.0123291015625, + "step": 1313 + }, + { + "epoch": 13.83157894736842, + "grad_norm": 2.0837317151745083e-06, + "learning_rate": 0.00017250526315789474, + "logits/chosen": 13.191129684448242, + "logits/rejected": 13.191129684448242, + "logps/chosen": -3997.51953125, + "logps/rejected": -3997.51953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9246520996094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9246520996094, + "step": 1314 + }, + { + "epoch": 13.842105263157894, + "grad_norm": 9.167787879960088e-07, + "learning_rate": 0.0001724842105263158, + "logits/chosen": 13.182260513305664, + "logits/rejected": 13.182260513305664, + "logps/chosen": -2673.736328125, + "logps/rejected": -2673.736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.592041015625, + "rewards/margins": 0.0, + "rewards/rejected": -264.592041015625, + "step": 1315 + }, + { + "epoch": 13.852631578947369, + "grad_norm": 3.7157267342990963e-06, + "learning_rate": 0.00017246315789473687, + "logits/chosen": 13.274850845336914, + "logits/rejected": 13.274850845336914, + "logps/chosen": -5176.7109375, + "logps/rejected": -5176.7109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.732666015625, + "rewards/margins": 0.0, + "rewards/rejected": -514.732666015625, + "step": 1316 + }, + { + "epoch": 13.863157894736842, + "grad_norm": 1.0027770258602686e-06, + "learning_rate": 0.0001724421052631579, + "logits/chosen": 13.207159996032715, + "logits/rejected": 13.207159996032715, + "logps/chosen": -3543.87109375, + "logps/rejected": -3543.87109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3612365722656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3612365722656, + "step": 1317 + }, + { + "epoch": 13.873684210526315, + "grad_norm": 1.4301142527983757e-06, + "learning_rate": 0.00017242105263157894, + "logits/chosen": 13.190265655517578, + "logits/rejected": 13.190265655517578, + "logps/chosen": -3997.947265625, + "logps/rejected": -3997.947265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9674072265625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9674072265625, + "step": 1318 + }, + { + "epoch": 13.884210526315789, + "grad_norm": 2.0466534351726295e-06, + "learning_rate": 0.00017240000000000002, + "logits/chosen": 13.200031280517578, + "logits/rejected": 13.200031280517578, + "logps/chosen": -3758.0693359375, + "logps/rejected": -3758.0693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8886413574219, + "rewards/margins": 0.0, + "rewards/rejected": -372.8886413574219, + "step": 1319 + }, + { + "epoch": 13.894736842105264, + "grad_norm": 1.5260916370607447e-06, + "learning_rate": 0.00017237894736842107, + "logits/chosen": 13.19677448272705, + "logits/rejected": 13.19677448272705, + "logps/chosen": -3544.154296875, + "logps/rejected": -3544.154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3895568847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3895568847656, + "step": 1320 + }, + { + "epoch": 13.905263157894737, + "grad_norm": 1.5502948826906504e-06, + "learning_rate": 0.00017235789473684211, + "logits/chosen": 13.181499481201172, + "logits/rejected": 13.181499481201172, + "logps/chosen": -3998.498046875, + "logps/rejected": -3998.498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0224914550781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0224914550781, + "step": 1321 + }, + { + "epoch": 13.91578947368421, + "grad_norm": 1.4038795370652224e-06, + "learning_rate": 0.00017233684210526316, + "logits/chosen": 13.24766731262207, + "logits/rejected": 13.24766731262207, + "logps/chosen": -4880.6533203125, + "logps/rejected": -4880.6533203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2181701660156, + "rewards/margins": 0.0, + "rewards/rejected": -485.2181701660156, + "step": 1322 + }, + { + "epoch": 13.926315789473684, + "grad_norm": 9.854934432951268e-07, + "learning_rate": 0.0001723157894736842, + "logits/chosen": 13.16574478149414, + "logits/rejected": 13.16574478149414, + "logps/chosen": -2673.9072265625, + "logps/rejected": -2673.9072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.609130859375, + "rewards/margins": 0.0, + "rewards/rejected": -264.609130859375, + "step": 1323 + }, + { + "epoch": 13.936842105263159, + "grad_norm": 1.7348602341371588e-06, + "learning_rate": 0.00017229473684210526, + "logits/chosen": 13.190699577331543, + "logits/rejected": 13.190699577331543, + "logps/chosen": -3544.298828125, + "logps/rejected": -3544.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4040222167969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4040222167969, + "step": 1324 + }, + { + "epoch": 13.947368421052632, + "grad_norm": 1.4018485217093257e-06, + "learning_rate": 0.0001722736842105263, + "logits/chosen": 13.245927810668945, + "logits/rejected": 13.245927810668945, + "logps/chosen": -4880.5869140625, + "logps/rejected": -4880.5869140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2115173339844, + "rewards/margins": 0.0, + "rewards/rejected": -485.2115173339844, + "step": 1325 + }, + { + "epoch": 13.957894736842105, + "grad_norm": 1.5222434512907057e-06, + "learning_rate": 0.0001722526315789474, + "logits/chosen": 13.194190979003906, + "logits/rejected": 13.194190979003906, + "logps/chosen": -4287.30615234375, + "logps/rejected": -4287.30615234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9332580566406, + "rewards/margins": 0.0, + "rewards/rejected": -425.9332580566406, + "step": 1326 + }, + { + "epoch": 13.968421052631578, + "grad_norm": 2.366171656831284e-06, + "learning_rate": 0.00017223157894736844, + "logits/chosen": 13.230754852294922, + "logits/rejected": 13.230754852294922, + "logps/chosen": -4326.416015625, + "logps/rejected": -4326.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4567565917969, + "rewards/margins": 0.0, + "rewards/rejected": -429.4567565917969, + "step": 1327 + }, + { + "epoch": 13.978947368421053, + "grad_norm": 2.2065630673751002e-06, + "learning_rate": 0.0001722105263157895, + "logits/chosen": 13.260734558105469, + "logits/rejected": 13.260734558105469, + "logps/chosen": -5176.2802734375, + "logps/rejected": -5176.2802734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6896362304688, + "rewards/margins": 0.0, + "rewards/rejected": -514.6896362304688, + "step": 1328 + }, + { + "epoch": 13.989473684210527, + "grad_norm": 3.0406204132304993e-06, + "learning_rate": 0.00017218947368421054, + "logits/chosen": 13.262333869934082, + "logits/rejected": 13.262333869934082, + "logps/chosen": -5176.4462890625, + "logps/rejected": -5176.4462890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7062377929688, + "rewards/margins": 0.0, + "rewards/rejected": -514.7062377929688, + "step": 1329 + }, + { + "epoch": 14.0, + "grad_norm": 3.0065550618019188e-06, + "learning_rate": 0.00017216842105263159, + "logits/chosen": 13.265579223632812, + "logits/rejected": 13.265579223632812, + "logps/chosen": -5176.94140625, + "logps/rejected": -5176.94140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7557373046875, + "rewards/margins": 0.0, + "rewards/rejected": -514.7557373046875, + "step": 1330 + }, + { + "epoch": 14.010526315789473, + "grad_norm": 1.303809085584362e-06, + "learning_rate": 0.00017214736842105263, + "logits/chosen": 13.205971717834473, + "logits/rejected": 13.205971717834473, + "logps/chosen": -3758.37109375, + "logps/rejected": -3758.37109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9187927246094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9187927246094, + "step": 1331 + }, + { + "epoch": 14.021052631578947, + "grad_norm": 1.5817903431525338e-06, + "learning_rate": 0.00017212631578947368, + "logits/chosen": 13.20882511138916, + "logits/rejected": 13.20882511138916, + "logps/chosen": -3758.20703125, + "logps/rejected": -3758.20703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.90240478515625, + "rewards/margins": 0.0, + "rewards/rejected": -372.90240478515625, + "step": 1332 + }, + { + "epoch": 14.031578947368422, + "grad_norm": 2.65092535300937e-06, + "learning_rate": 0.00017210526315789476, + "logits/chosen": 13.195255279541016, + "logits/rejected": 13.195255279541016, + "logps/chosen": -3998.2421875, + "logps/rejected": -3998.2421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9969177246094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9969177246094, + "step": 1333 + }, + { + "epoch": 14.042105263157895, + "grad_norm": 1.4598131201637443e-06, + "learning_rate": 0.0001720842105263158, + "logits/chosen": 13.193469047546387, + "logits/rejected": 13.193469047546387, + "logps/chosen": -3998.203125, + "logps/rejected": -3998.203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9930114746094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9930114746094, + "step": 1334 + }, + { + "epoch": 14.052631578947368, + "grad_norm": 2.649177076818887e-06, + "learning_rate": 0.00017206315789473686, + "logits/chosen": 13.252957344055176, + "logits/rejected": 13.252957344055176, + "logps/chosen": -4880.5791015625, + "logps/rejected": -4880.5791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.21075439453125, + "rewards/margins": 0.0, + "rewards/rejected": -485.21075439453125, + "step": 1335 + }, + { + "epoch": 14.063157894736841, + "grad_norm": 1.0034248134616064e-06, + "learning_rate": 0.00017204210526315788, + "logits/chosen": 13.19357681274414, + "logits/rejected": 13.19357681274414, + "logps/chosen": -2967.359375, + "logps/rejected": -2967.359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0097351074219, + "rewards/margins": 0.0, + "rewards/rejected": -294.0097351074219, + "step": 1336 + }, + { + "epoch": 14.073684210526316, + "grad_norm": 8.169994544005021e-06, + "learning_rate": 0.00017202105263157896, + "logits/chosen": 13.253226280212402, + "logits/rejected": 13.253226280212402, + "logps/chosen": -5177.7568359375, + "logps/rejected": -5177.7568359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8372802734375, + "rewards/margins": 0.0, + "rewards/rejected": -514.8372802734375, + "step": 1337 + }, + { + "epoch": 14.08421052631579, + "grad_norm": 9.519492891740811e-07, + "learning_rate": 0.000172, + "logits/chosen": 13.159282684326172, + "logits/rejected": 13.159282684326172, + "logps/chosen": -2672.634765625, + "logps/rejected": -2672.634765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4819030761719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4819030761719, + "step": 1338 + }, + { + "epoch": 14.094736842105263, + "grad_norm": 1.65651690622326e-05, + "learning_rate": 0.00017197894736842106, + "logits/chosen": 13.237701416015625, + "logits/rejected": 13.237701416015625, + "logps/chosen": -5176.2880859375, + "logps/rejected": -5176.2880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6903686523438, + "rewards/margins": 0.0, + "rewards/rejected": -514.6903686523438, + "step": 1339 + }, + { + "epoch": 14.105263157894736, + "grad_norm": 7.876771633164026e-06, + "learning_rate": 0.0001719578947368421, + "logits/chosen": 13.184174537658691, + "logits/rejected": 13.184174537658691, + "logps/chosen": -3774.2265625, + "logps/rejected": -3774.2265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.5535583496094, + "rewards/margins": 0.0, + "rewards/rejected": -374.5535583496094, + "step": 1340 + }, + { + "epoch": 14.115789473684211, + "grad_norm": 1.3585263332061004e-05, + "learning_rate": 0.00017193684210526318, + "logits/chosen": 13.225566864013672, + "logits/rejected": 13.225566864013672, + "logps/chosen": -4323.64453125, + "logps/rejected": -4323.64453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.1795959472656, + "rewards/margins": 0.0, + "rewards/rejected": -429.1795959472656, + "step": 1341 + }, + { + "epoch": 14.126315789473685, + "grad_norm": 1.6314090771629708e-06, + "learning_rate": 0.0001719157894736842, + "logits/chosen": 13.169658660888672, + "logits/rejected": 13.169658660888672, + "logps/chosen": -3776.046875, + "logps/rejected": -3776.046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7355651855469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7355651855469, + "step": 1342 + }, + { + "epoch": 14.136842105263158, + "grad_norm": 2.9886099582654424e-05, + "learning_rate": 0.00017189473684210525, + "logits/chosen": 13.212135314941406, + "logits/rejected": 13.212135314941406, + "logps/chosen": -5173.58642578125, + "logps/rejected": -5173.58642578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4202270507812, + "rewards/margins": 0.0, + "rewards/rejected": -514.4202270507812, + "step": 1343 + }, + { + "epoch": 14.147368421052631, + "grad_norm": 1.2741395948978607e-05, + "learning_rate": 0.00017187368421052633, + "logits/chosen": 13.180377960205078, + "logits/rejected": 13.180377960205078, + "logps/chosen": -3754.025390625, + "logps/rejected": -3754.025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.4842224121094, + "rewards/margins": 0.0, + "rewards/rejected": -372.4842224121094, + "step": 1344 + }, + { + "epoch": 14.157894736842104, + "grad_norm": 1.555896051286254e-05, + "learning_rate": 0.00017185263157894738, + "logits/chosen": 13.168469429016113, + "logits/rejected": 13.168469429016113, + "logps/chosen": -3533.8203125, + "logps/rejected": -3533.8203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.3561706542969, + "rewards/margins": 0.0, + "rewards/rejected": -350.3561706542969, + "step": 1345 + }, + { + "epoch": 14.16842105263158, + "grad_norm": 5.479887022374896e-06, + "learning_rate": 0.00017183157894736843, + "logits/chosen": 13.175041198730469, + "logits/rejected": 13.175041198730469, + "logps/chosen": -2964.75, + "logps/rejected": -2964.75, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.7488098144531, + "rewards/margins": 0.0, + "rewards/rejected": -293.7488098144531, + "step": 1346 + }, + { + "epoch": 14.178947368421053, + "grad_norm": 2.8144702355348272e-06, + "learning_rate": 0.00017181052631578948, + "logits/chosen": 13.155779838562012, + "logits/rejected": 13.155779838562012, + "logps/chosen": -4287.39453125, + "logps/rejected": -4287.39453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9421081542969, + "rewards/margins": 0.0, + "rewards/rejected": -425.9421081542969, + "step": 1347 + }, + { + "epoch": 14.189473684210526, + "grad_norm": 1.4167173503665254e-05, + "learning_rate": 0.00017178947368421055, + "logits/chosen": 13.124217987060547, + "logits/rejected": 13.124217987060547, + "logps/chosen": -3539.01171875, + "logps/rejected": -3539.01171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.87530517578125, + "rewards/margins": 0.0, + "rewards/rejected": -350.87530517578125, + "step": 1348 + }, + { + "epoch": 14.2, + "grad_norm": 8.512510248692706e-06, + "learning_rate": 0.00017176842105263158, + "logits/chosen": 13.127250671386719, + "logits/rejected": 13.127250671386719, + "logps/chosen": -3995.546875, + "logps/rejected": -3995.546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7273864746094, + "rewards/margins": 0.0, + "rewards/rejected": -396.7273864746094, + "step": 1349 + }, + { + "epoch": 14.210526315789474, + "grad_norm": 1.8842674762709066e-06, + "learning_rate": 0.00017174736842105262, + "logits/chosen": 13.14427375793457, + "logits/rejected": 13.14427375793457, + "logps/chosen": -2672.650390625, + "logps/rejected": -2672.650390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.48345947265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.48345947265625, + "step": 1350 + }, + { + "epoch": 14.210526315789474, + "eval_logits/chosen": 13.205266952514648, + "eval_logits/rejected": 13.205266952514648, + "eval_logps/chosen": -4303.63818359375, + "eval_logps/rejected": -4303.63818359375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -427.460693359375, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -427.460693359375, + "eval_runtime": 4.2485, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 2.354, + "step": 1350 + }, + { + "epoch": 14.221052631578948, + "grad_norm": 7.026199000392808e-06, + "learning_rate": 0.0001717263157894737, + "logits/chosen": 13.177156448364258, + "logits/rejected": 13.177156448364258, + "logps/chosen": -2963.359375, + "logps/rejected": -2963.359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.6097412109375, + "rewards/margins": 0.0, + "rewards/rejected": -293.6097412109375, + "step": 1351 + }, + { + "epoch": 14.23157894736842, + "grad_norm": 1.765816705301404e-05, + "learning_rate": 0.00017170526315789475, + "logits/chosen": 13.15739917755127, + "logits/rejected": 13.15739917755127, + "logps/chosen": -3988.13671875, + "logps/rejected": -3988.13671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -395.9863586425781, + "rewards/margins": 0.0, + "rewards/rejected": -395.9863586425781, + "step": 1352 + }, + { + "epoch": 14.242105263157894, + "grad_norm": 7.770182492095046e-06, + "learning_rate": 0.0001716842105263158, + "logits/chosen": 13.227485656738281, + "logits/rejected": 13.227485656738281, + "logps/chosen": -5177.908203125, + "logps/rejected": -5177.908203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8524169921875, + "rewards/margins": 0.0, + "rewards/rejected": -514.8524169921875, + "step": 1353 + }, + { + "epoch": 14.25263157894737, + "grad_norm": 2.5974733944167383e-05, + "learning_rate": 0.00017166315789473685, + "logits/chosen": 13.068532943725586, + "logits/rejected": 13.068532943725586, + "logps/chosen": -3530.765625, + "logps/rejected": -3530.765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.0506896972656, + "rewards/margins": 0.0, + "rewards/rejected": -350.0506896972656, + "step": 1354 + }, + { + "epoch": 14.263157894736842, + "grad_norm": 2.8610100343939848e-05, + "learning_rate": 0.0001716421052631579, + "logits/chosen": 13.18753719329834, + "logits/rejected": 13.18753719329834, + "logps/chosen": -5172.0068359375, + "logps/rejected": -5172.0068359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2622680664062, + "rewards/margins": 0.0, + "rewards/rejected": -514.2622680664062, + "step": 1355 + }, + { + "epoch": 14.273684210526316, + "grad_norm": 1.2156935554230586e-05, + "learning_rate": 0.00017162105263157895, + "logits/chosen": 13.169296264648438, + "logits/rejected": 13.169296264648438, + "logps/chosen": -3538.7587890625, + "logps/rejected": -3538.7587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.8500061035156, + "rewards/margins": 0.0, + "rewards/rejected": -350.8500061035156, + "step": 1356 + }, + { + "epoch": 14.284210526315789, + "grad_norm": 3.180498242727481e-05, + "learning_rate": 0.0001716, + "logits/chosen": 13.19412899017334, + "logits/rejected": 13.19412899017334, + "logps/chosen": -4302.65234375, + "logps/rejected": -4302.65234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -427.08038330078125, + "rewards/margins": 0.0, + "rewards/rejected": -427.08038330078125, + "step": 1357 + }, + { + "epoch": 14.294736842105262, + "grad_norm": 1.5012937183200847e-05, + "learning_rate": 0.00017157894736842107, + "logits/chosen": 13.164214134216309, + "logits/rejected": 13.164214134216309, + "logps/chosen": -3533.70703125, + "logps/rejected": -3533.70703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.3448181152344, + "rewards/margins": 0.0, + "rewards/rejected": -350.3448181152344, + "step": 1358 + }, + { + "epoch": 14.305263157894737, + "grad_norm": 5.59252612220007e-06, + "learning_rate": 0.00017155789473684212, + "logits/chosen": 13.21369457244873, + "logits/rejected": 13.21369457244873, + "logps/chosen": -4877.013671875, + "logps/rejected": -4877.013671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.8542175292969, + "rewards/margins": 0.0, + "rewards/rejected": -484.8542175292969, + "step": 1359 + }, + { + "epoch": 14.31578947368421, + "grad_norm": 1.5276284102583304e-05, + "learning_rate": 0.00017153684210526317, + "logits/chosen": 13.001856803894043, + "logits/rejected": 13.001856803894043, + "logps/chosen": -2658.439453125, + "logps/rejected": -2658.439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -263.0623474121094, + "rewards/margins": 0.0, + "rewards/rejected": -263.0623474121094, + "step": 1360 + }, + { + "epoch": 14.326315789473684, + "grad_norm": 4.242912109475583e-05, + "learning_rate": 0.00017151578947368422, + "logits/chosen": 12.979222297668457, + "logits/rejected": 12.979222297668457, + "logps/chosen": -3965.40625, + "logps/rejected": -3965.40625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -393.71331787109375, + "rewards/margins": 0.0, + "rewards/rejected": -393.71331787109375, + "step": 1361 + }, + { + "epoch": 14.336842105263157, + "grad_norm": 1.6982289707812015e-06, + "learning_rate": 0.00017149473684210527, + "logits/chosen": 13.156967163085938, + "logits/rejected": 13.156967163085938, + "logps/chosen": -3776.45703125, + "logps/rejected": -3776.45703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7765808105469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7765808105469, + "step": 1362 + }, + { + "epoch": 14.347368421052632, + "grad_norm": 1.9396415154915303e-05, + "learning_rate": 0.00017147368421052632, + "logits/chosen": 13.17154312133789, + "logits/rejected": 13.17154312133789, + "logps/chosen": -3979.978515625, + "logps/rejected": -3979.978515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -395.1705322265625, + "rewards/margins": 0.0, + "rewards/rejected": -395.1705322265625, + "step": 1363 + }, + { + "epoch": 14.357894736842105, + "grad_norm": 1.6068575860117562e-05, + "learning_rate": 0.00017145263157894737, + "logits/chosen": 13.17044734954834, + "logits/rejected": 13.17044734954834, + "logps/chosen": -3524.6103515625, + "logps/rejected": -3524.6103515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -349.4351501464844, + "rewards/margins": 0.0, + "rewards/rejected": -349.4351501464844, + "step": 1364 + }, + { + "epoch": 14.368421052631579, + "grad_norm": 1.9298457118566148e-05, + "learning_rate": 0.00017143157894736845, + "logits/chosen": 13.162466049194336, + "logits/rejected": 13.162466049194336, + "logps/chosen": -3977.45703125, + "logps/rejected": -3977.45703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -394.91839599609375, + "rewards/margins": 0.0, + "rewards/rejected": -394.91839599609375, + "step": 1365 + }, + { + "epoch": 14.378947368421052, + "grad_norm": 1.4766707863600459e-05, + "learning_rate": 0.0001714105263157895, + "logits/chosen": 13.214545249938965, + "logits/rejected": 13.214545249938965, + "logps/chosen": -4871.7392578125, + "logps/rejected": -4871.7392578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.3267517089844, + "rewards/margins": 0.0, + "rewards/rejected": -484.3267517089844, + "step": 1366 + }, + { + "epoch": 14.389473684210527, + "grad_norm": 1.4433329852181487e-05, + "learning_rate": 0.00017138947368421054, + "logits/chosen": 13.064927101135254, + "logits/rejected": 13.064927101135254, + "logps/chosen": -3990.587890625, + "logps/rejected": -3990.587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.2314758300781, + "rewards/margins": 0.0, + "rewards/rejected": -396.2314758300781, + "step": 1367 + }, + { + "epoch": 14.4, + "grad_norm": 2.533439692342654e-05, + "learning_rate": 0.00017136842105263157, + "logits/chosen": 12.99682331085205, + "logits/rejected": 12.99682331085205, + "logps/chosen": -3522.75390625, + "logps/rejected": -3522.75390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -349.24951171875, + "rewards/margins": 0.0, + "rewards/rejected": -349.24951171875, + "step": 1368 + }, + { + "epoch": 14.410526315789474, + "grad_norm": 3.089273741352372e-05, + "learning_rate": 0.00017134736842105264, + "logits/chosen": 13.113932609558105, + "logits/rejected": 13.113932609558105, + "logps/chosen": -5161.814453125, + "logps/rejected": -5161.814453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.2430419921875, + "rewards/margins": 0.0, + "rewards/rejected": -513.2430419921875, + "step": 1369 + }, + { + "epoch": 14.421052631578947, + "grad_norm": 9.777106697583804e-07, + "learning_rate": 0.0001713263157894737, + "logits/chosen": 13.1111478805542, + "logits/rejected": 13.1111478805542, + "logps/chosen": -2672.65234375, + "logps/rejected": -2672.65234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.483642578125, + "rewards/margins": 0.0, + "rewards/rejected": -264.483642578125, + "step": 1370 + }, + { + "epoch": 14.431578947368422, + "grad_norm": 6.444215614465065e-06, + "learning_rate": 0.00017130526315789474, + "logits/chosen": 13.156900405883789, + "logits/rejected": 13.156900405883789, + "logps/chosen": -2958.296875, + "logps/rejected": -2958.296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.1034851074219, + "rewards/margins": 0.0, + "rewards/rejected": -293.1034851074219, + "step": 1371 + }, + { + "epoch": 14.442105263157895, + "grad_norm": 8.70741769176675e-06, + "learning_rate": 0.0001712842105263158, + "logits/chosen": 13.146058082580566, + "logits/rejected": 13.146058082580566, + "logps/chosen": -2946.986328125, + "logps/rejected": -2946.986328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -291.9724426269531, + "rewards/margins": 0.0, + "rewards/rejected": -291.9724426269531, + "step": 1372 + }, + { + "epoch": 14.452631578947368, + "grad_norm": 1.7357309843646362e-05, + "learning_rate": 0.00017126315789473687, + "logits/chosen": 13.128440856933594, + "logits/rejected": 13.128440856933594, + "logps/chosen": -3744.365234375, + "logps/rejected": -3744.365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -371.5674133300781, + "rewards/margins": 0.0, + "rewards/rejected": -371.5674133300781, + "step": 1373 + }, + { + "epoch": 14.463157894736842, + "grad_norm": 2.121261968568433e-05, + "learning_rate": 0.0001712421052631579, + "logits/chosen": 13.164679527282715, + "logits/rejected": 13.164679527282715, + "logps/chosen": -4298.451171875, + "logps/rejected": -4298.451171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.6602478027344, + "rewards/margins": 0.0, + "rewards/rejected": -426.6602478027344, + "step": 1374 + }, + { + "epoch": 14.473684210526315, + "grad_norm": 1.280099149880698e-05, + "learning_rate": 0.00017122105263157894, + "logits/chosen": 13.112738609313965, + "logits/rejected": 13.112738609313965, + "logps/chosen": -3988.1796875, + "logps/rejected": -3988.1796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -395.99066162109375, + "rewards/margins": 0.0, + "rewards/rejected": -395.99066162109375, + "step": 1375 + }, + { + "epoch": 14.48421052631579, + "grad_norm": 1.5633984276064439e-06, + "learning_rate": 0.00017120000000000001, + "logits/chosen": 13.05401611328125, + "logits/rejected": 13.05401611328125, + "logps/chosen": -3998.109375, + "logps/rejected": -3998.109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9836120605469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9836120605469, + "step": 1376 + }, + { + "epoch": 14.494736842105263, + "grad_norm": 2.510229023755528e-05, + "learning_rate": 0.00017117894736842106, + "logits/chosen": 13.020404815673828, + "logits/rejected": 13.020404815673828, + "logps/chosen": -4862.0576171875, + "logps/rejected": -4862.0576171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -483.3586120605469, + "rewards/margins": 0.0, + "rewards/rejected": -483.3586120605469, + "step": 1377 + }, + { + "epoch": 14.505263157894737, + "grad_norm": 3.04246241285e-05, + "learning_rate": 0.0001711578947368421, + "logits/chosen": 12.989005088806152, + "logits/rejected": 12.989005088806152, + "logps/chosen": -4857.396484375, + "logps/rejected": -4857.396484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -482.8924865722656, + "rewards/margins": 0.0, + "rewards/rejected": -482.8924865722656, + "step": 1378 + }, + { + "epoch": 14.51578947368421, + "grad_norm": 4.315393653087085e-06, + "learning_rate": 0.00017113684210526316, + "logits/chosen": 12.965651512145996, + "logits/rejected": 12.965651512145996, + "logps/chosen": -2669.421875, + "logps/rejected": -2669.421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.1606140136719, + "rewards/margins": 0.0, + "rewards/rejected": -264.1606140136719, + "step": 1379 + }, + { + "epoch": 14.526315789473685, + "grad_norm": 1.223992171617283e-06, + "learning_rate": 0.00017111578947368424, + "logits/chosen": 13.045852661132812, + "logits/rejected": 13.045852661132812, + "logps/chosen": -3542.16796875, + "logps/rejected": -3542.16796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.19091796875, + "rewards/margins": 0.0, + "rewards/rejected": -351.19091796875, + "step": 1380 + }, + { + "epoch": 14.536842105263158, + "grad_norm": 1.3810269592795521e-05, + "learning_rate": 0.00017109473684210526, + "logits/chosen": 13.118818283081055, + "logits/rejected": 13.118818283081055, + "logps/chosen": -4869.65185546875, + "logps/rejected": -4869.65185546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.1180114746094, + "rewards/margins": 0.0, + "rewards/rejected": -484.1180114746094, + "step": 1381 + }, + { + "epoch": 14.547368421052632, + "grad_norm": 6.774283519916935e-06, + "learning_rate": 0.0001710736842105263, + "logits/chosen": 13.078365325927734, + "logits/rejected": 13.078365325927734, + "logps/chosen": -3537.509765625, + "logps/rejected": -3537.509765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -350.72509765625, + "rewards/margins": 0.0, + "rewards/rejected": -350.72509765625, + "step": 1382 + }, + { + "epoch": 14.557894736842105, + "grad_norm": 7.00138161846553e-06, + "learning_rate": 0.00017105263157894739, + "logits/chosen": 13.082615852355957, + "logits/rejected": 13.082615852355957, + "logps/chosen": -3750.982421875, + "logps/rejected": -3750.982421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.179931640625, + "rewards/margins": 0.0, + "rewards/rejected": -372.179931640625, + "step": 1383 + }, + { + "epoch": 14.568421052631578, + "grad_norm": 1.6010828403523192e-05, + "learning_rate": 0.00017103157894736844, + "logits/chosen": 13.148509979248047, + "logits/rejected": 13.148509979248047, + "logps/chosen": -5164.42578125, + "logps/rejected": -5164.42578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.504150390625, + "rewards/margins": 0.0, + "rewards/rejected": -513.504150390625, + "step": 1384 + }, + { + "epoch": 14.578947368421053, + "grad_norm": 1.1382093134670868e-06, + "learning_rate": 0.00017101052631578948, + "logits/chosen": 13.071215629577637, + "logits/rejected": 13.071215629577637, + "logps/chosen": -3542.55078125, + "logps/rejected": -3542.55078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2292175292969, + "rewards/margins": 0.0, + "rewards/rejected": -351.2292175292969, + "step": 1385 + }, + { + "epoch": 14.589473684210526, + "grad_norm": 7.105096756276907e-06, + "learning_rate": 0.00017098947368421053, + "logits/chosen": 13.12106704711914, + "logits/rejected": 13.12106704711914, + "logps/chosen": -5171.47705078125, + "logps/rejected": -5171.47705078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.2092895507812, + "rewards/margins": 0.0, + "rewards/rejected": -514.2092895507812, + "step": 1386 + }, + { + "epoch": 14.6, + "grad_norm": 1.8247093976242468e-05, + "learning_rate": 0.00017096842105263158, + "logits/chosen": 13.034560203552246, + "logits/rejected": 13.034560203552246, + "logps/chosen": -4314.28125, + "logps/rejected": -4314.28125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -428.2432556152344, + "rewards/margins": 0.0, + "rewards/rejected": -428.2432556152344, + "step": 1387 + }, + { + "epoch": 14.610526315789473, + "grad_norm": 1.1161985639773775e-05, + "learning_rate": 0.00017094736842105263, + "logits/chosen": 13.062606811523438, + "logits/rejected": 13.062606811523438, + "logps/chosen": -4280.14208984375, + "logps/rejected": -4280.14208984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.21685791015625, + "rewards/margins": 0.0, + "rewards/rejected": -425.21685791015625, + "step": 1388 + }, + { + "epoch": 14.621052631578948, + "grad_norm": 2.6693758172768867e-06, + "learning_rate": 0.00017092631578947368, + "logits/chosen": 13.179910659790039, + "logits/rejected": 13.179910659790039, + "logps/chosen": -4878.9443359375, + "logps/rejected": -4878.9443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0472717285156, + "rewards/margins": 0.0, + "rewards/rejected": -485.0472717285156, + "step": 1389 + }, + { + "epoch": 14.631578947368421, + "grad_norm": 8.390725838580693e-07, + "learning_rate": 0.00017090526315789476, + "logits/chosen": 13.135778427124023, + "logits/rejected": 13.135778427124023, + "logps/chosen": -2673.55078125, + "logps/rejected": -2673.55078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.573486328125, + "rewards/margins": 0.0, + "rewards/rejected": -264.573486328125, + "step": 1390 + }, + { + "epoch": 14.642105263157895, + "grad_norm": 3.430728838793584e-06, + "learning_rate": 0.0001708842105263158, + "logits/chosen": 13.19646167755127, + "logits/rejected": 13.19646167755127, + "logps/chosen": -3542.49609375, + "logps/rejected": -3542.49609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2237243652344, + "rewards/margins": 0.0, + "rewards/rejected": -351.2237243652344, + "step": 1391 + }, + { + "epoch": 14.652631578947368, + "grad_norm": 1.7772032151697204e-05, + "learning_rate": 0.00017086315789473686, + "logits/chosen": 13.278853416442871, + "logits/rejected": 13.278853416442871, + "logps/chosen": -5165.3515625, + "logps/rejected": -5165.3515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -513.5967407226562, + "rewards/margins": 0.0, + "rewards/rejected": -513.5967407226562, + "step": 1392 + }, + { + "epoch": 14.663157894736843, + "grad_norm": 2.0008653791592224e-06, + "learning_rate": 0.00017084210526315788, + "logits/chosen": 13.20356273651123, + "logits/rejected": 13.20356273651123, + "logps/chosen": -2672.498046875, + "logps/rejected": -2672.498046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4682312011719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4682312011719, + "step": 1393 + }, + { + "epoch": 14.673684210526316, + "grad_norm": 1.9669969333335757e-06, + "learning_rate": 0.00017082105263157896, + "logits/chosen": 13.22119140625, + "logits/rejected": 13.22119140625, + "logps/chosen": -3777.6875, + "logps/rejected": -3777.6875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8996276855469, + "rewards/margins": 0.0, + "rewards/rejected": -374.8996276855469, + "step": 1394 + }, + { + "epoch": 14.68421052631579, + "grad_norm": 1.116115413424268e-06, + "learning_rate": 0.0001708, + "logits/chosen": 13.217914581298828, + "logits/rejected": 13.217914581298828, + "logps/chosen": -3757.6015625, + "logps/rejected": -3757.6015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.84185791015625, + "rewards/margins": 0.0, + "rewards/rejected": -372.84185791015625, + "step": 1395 + }, + { + "epoch": 14.694736842105263, + "grad_norm": 2.5807366910157725e-06, + "learning_rate": 0.00017077894736842105, + "logits/chosen": 13.215991020202637, + "logits/rejected": 13.215991020202637, + "logps/chosen": -3544.076171875, + "logps/rejected": -3544.076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3817443847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3817443847656, + "step": 1396 + }, + { + "epoch": 14.705263157894738, + "grad_norm": 5.147968295204919e-06, + "learning_rate": 0.00017075789473684213, + "logits/chosen": 13.202549934387207, + "logits/rejected": 13.202549934387207, + "logps/chosen": -3776.275390625, + "logps/rejected": -3776.275390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7584228515625, + "rewards/margins": 0.0, + "rewards/rejected": -374.7584228515625, + "step": 1397 + }, + { + "epoch": 14.715789473684211, + "grad_norm": 2.7654589302983368e-06, + "learning_rate": 0.00017073684210526318, + "logits/chosen": 13.183501243591309, + "logits/rejected": 13.183501243591309, + "logps/chosen": -2672.6220703125, + "logps/rejected": -2672.6220703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4806213378906, + "rewards/margins": 0.0, + "rewards/rejected": -264.4806213378906, + "step": 1398 + }, + { + "epoch": 14.726315789473684, + "grad_norm": 3.7479319416888757e-06, + "learning_rate": 0.00017071578947368423, + "logits/chosen": 13.294772148132324, + "logits/rejected": 13.294772148132324, + "logps/chosen": -5176.44091796875, + "logps/rejected": -5176.44091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7056884765625, + "rewards/margins": 0.0, + "rewards/rejected": -514.7056884765625, + "step": 1399 + }, + { + "epoch": 14.736842105263158, + "grad_norm": 2.0623833734134678e-06, + "learning_rate": 0.00017069473684210525, + "logits/chosen": 13.232860565185547, + "logits/rejected": 13.232860565185547, + "logps/chosen": -2967.2158203125, + "logps/rejected": -2967.2158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9953918457031, + "rewards/margins": 0.0, + "rewards/rejected": -293.9953918457031, + "step": 1400 + }, + { + "epoch": 14.736842105263158, + "eval_logits/chosen": 13.278051376342773, + "eval_logits/rejected": 13.278051376342773, + "eval_logps/chosen": -4311.49755859375, + "eval_logps/rejected": -4311.49755859375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.24664306640625, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.24664306640625, + "eval_runtime": 4.3236, + "eval_samples_per_second": 2.313, + "eval_steps_per_second": 2.313, + "step": 1400 + }, + { + "epoch": 14.74736842105263, + "grad_norm": 2.655626303749159e-06, + "learning_rate": 0.00017067368421052633, + "logits/chosen": 13.240413665771484, + "logits/rejected": 13.240413665771484, + "logps/chosen": -3778.259765625, + "logps/rejected": -3778.259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9568786621094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9568786621094, + "step": 1401 + }, + { + "epoch": 14.757894736842106, + "grad_norm": 1.0872782922888291e-06, + "learning_rate": 0.00017065263157894738, + "logits/chosen": 13.25534439086914, + "logits/rejected": 13.25534439086914, + "logps/chosen": -3758.271484375, + "logps/rejected": -3758.271484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9088439941406, + "rewards/margins": 0.0, + "rewards/rejected": -372.9088439941406, + "step": 1402 + }, + { + "epoch": 14.76842105263158, + "grad_norm": 4.671528586186469e-06, + "learning_rate": 0.00017063157894736843, + "logits/chosen": 13.32196044921875, + "logits/rejected": 13.32196044921875, + "logps/chosen": -4878.6728515625, + "logps/rejected": -4878.6728515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0201110839844, + "rewards/margins": 0.0, + "rewards/rejected": -485.0201110839844, + "step": 1403 + }, + { + "epoch": 14.778947368421052, + "grad_norm": 2.7528462851478253e-06, + "learning_rate": 0.00017061052631578948, + "logits/chosen": 13.2721529006958, + "logits/rejected": 13.2721529006958, + "logps/chosen": -4287.81298828125, + "logps/rejected": -4287.81298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.98394775390625, + "rewards/margins": 0.0, + "rewards/rejected": -425.98394775390625, + "step": 1404 + }, + { + "epoch": 14.789473684210526, + "grad_norm": 2.0953516468580347e-06, + "learning_rate": 0.00017058947368421055, + "logits/chosen": 13.246936798095703, + "logits/rejected": 13.246936798095703, + "logps/chosen": -3995.33203125, + "logps/rejected": -3995.33203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7059020996094, + "rewards/margins": 0.0, + "rewards/rejected": -396.7059020996094, + "step": 1405 + }, + { + "epoch": 14.8, + "grad_norm": 4.696082669397583e-06, + "learning_rate": 0.00017056842105263157, + "logits/chosen": 13.311169624328613, + "logits/rejected": 13.311169624328613, + "logps/chosen": -5176.23046875, + "logps/rejected": -5176.23046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6846313476562, + "rewards/margins": 0.0, + "rewards/rejected": -514.6846313476562, + "step": 1406 + }, + { + "epoch": 14.810526315789474, + "grad_norm": 1.7624211068323348e-06, + "learning_rate": 0.00017054736842105262, + "logits/chosen": 13.294036865234375, + "logits/rejected": 13.294036865234375, + "logps/chosen": -5176.8974609375, + "logps/rejected": -5176.8974609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7513427734375, + "rewards/margins": 0.0, + "rewards/rejected": -514.7513427734375, + "step": 1407 + }, + { + "epoch": 14.821052631578947, + "grad_norm": 2.4072714950307272e-06, + "learning_rate": 0.0001705263157894737, + "logits/chosen": 13.267194747924805, + "logits/rejected": 13.267194747924805, + "logps/chosen": -4879.583984375, + "logps/rejected": -4879.583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1112365722656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1112365722656, + "step": 1408 + }, + { + "epoch": 14.83157894736842, + "grad_norm": 4.572812031256035e-06, + "learning_rate": 0.00017050526315789475, + "logits/chosen": 13.201088905334473, + "logits/rejected": 13.201088905334473, + "logps/chosen": -4288.3095703125, + "logps/rejected": -4288.3095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0335998535156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0335998535156, + "step": 1409 + }, + { + "epoch": 14.842105263157894, + "grad_norm": 4.520805305219255e-06, + "learning_rate": 0.0001704842105263158, + "logits/chosen": 13.195809364318848, + "logits/rejected": 13.195809364318848, + "logps/chosen": -4288.4296875, + "logps/rejected": -4288.4296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0456237792969, + "rewards/margins": 0.0, + "rewards/rejected": -426.0456237792969, + "step": 1410 + }, + { + "epoch": 14.852631578947369, + "grad_norm": 1.9461765532469144e-06, + "learning_rate": 0.00017046315789473685, + "logits/chosen": 13.164036750793457, + "logits/rejected": 13.164036750793457, + "logps/chosen": -2673.326171875, + "logps/rejected": -2673.326171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.551025390625, + "rewards/margins": 0.0, + "rewards/rejected": -264.551025390625, + "step": 1411 + }, + { + "epoch": 14.863157894736842, + "grad_norm": 1.728772417664004e-06, + "learning_rate": 0.0001704421052631579, + "logits/chosen": 13.183733940124512, + "logits/rejected": 13.183733940124512, + "logps/chosen": -3995.220703125, + "logps/rejected": -3995.220703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.69476318359375, + "rewards/margins": 0.0, + "rewards/rejected": -396.69476318359375, + "step": 1412 + }, + { + "epoch": 14.873684210526315, + "grad_norm": 1.232051545230206e-06, + "learning_rate": 0.00017042105263157895, + "logits/chosen": 13.204034805297852, + "logits/rejected": 13.204034805297852, + "logps/chosen": -4290.5673828125, + "logps/rejected": -4290.5673828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.2593688964844, + "rewards/margins": 0.0, + "rewards/rejected": -426.2593688964844, + "step": 1413 + }, + { + "epoch": 14.884210526315789, + "grad_norm": 1.7505884670754313e-06, + "learning_rate": 0.0001704, + "logits/chosen": 13.182990074157715, + "logits/rejected": 13.182990074157715, + "logps/chosen": -3995.57421875, + "logps/rejected": -3995.57421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7301025390625, + "rewards/margins": 0.0, + "rewards/rejected": -396.7301025390625, + "step": 1414 + }, + { + "epoch": 14.894736842105264, + "grad_norm": 1.1477078487587278e-06, + "learning_rate": 0.00017037894736842107, + "logits/chosen": 13.187840461730957, + "logits/rejected": 13.187840461730957, + "logps/chosen": -3543.3994140625, + "logps/rejected": -3543.3994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3140563964844, + "rewards/margins": 0.0, + "rewards/rejected": -351.3140563964844, + "step": 1415 + }, + { + "epoch": 14.905263157894737, + "grad_norm": 5.2062227950955275e-06, + "learning_rate": 0.00017035789473684212, + "logits/chosen": 13.2350435256958, + "logits/rejected": 13.2350435256958, + "logps/chosen": -4879.0400390625, + "logps/rejected": -4879.0400390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0568542480469, + "rewards/margins": 0.0, + "rewards/rejected": -485.0568542480469, + "step": 1416 + }, + { + "epoch": 14.91578947368421, + "grad_norm": 1.4766152389711351e-06, + "learning_rate": 0.00017033684210526317, + "logits/chosen": 13.200243949890137, + "logits/rejected": 13.200243949890137, + "logps/chosen": -4326.447265625, + "logps/rejected": -4326.447265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4598693847656, + "rewards/margins": 0.0, + "rewards/rejected": -429.4598693847656, + "step": 1417 + }, + { + "epoch": 14.926315789473684, + "grad_norm": 3.0300182061182568e-06, + "learning_rate": 0.00017031578947368422, + "logits/chosen": 13.221475601196289, + "logits/rejected": 13.221475601196289, + "logps/chosen": -4879.955078125, + "logps/rejected": -4879.955078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1483459472656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1483459472656, + "step": 1418 + }, + { + "epoch": 14.936842105263159, + "grad_norm": 9.390932405040076e-07, + "learning_rate": 0.00017029473684210527, + "logits/chosen": 13.138678550720215, + "logits/rejected": 13.138678550720215, + "logps/chosen": -2674.5126953125, + "logps/rejected": -2674.5126953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.669677734375, + "rewards/margins": 0.0, + "rewards/rejected": -264.669677734375, + "step": 1419 + }, + { + "epoch": 14.947368421052632, + "grad_norm": 1.936387889145408e-06, + "learning_rate": 0.00017027368421052632, + "logits/chosen": 13.155964851379395, + "logits/rejected": 13.155964851379395, + "logps/chosen": -3542.1923828125, + "logps/rejected": -3542.1923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.193359375, + "rewards/margins": 0.0, + "rewards/rejected": -351.193359375, + "step": 1420 + }, + { + "epoch": 14.957894736842105, + "grad_norm": 1.9245119347033324e-06, + "learning_rate": 0.00017025263157894737, + "logits/chosen": 13.207898139953613, + "logits/rejected": 13.207898139953613, + "logps/chosen": -4880.283203125, + "logps/rejected": -4880.283203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.18115234375, + "rewards/margins": 0.0, + "rewards/rejected": -485.18115234375, + "step": 1421 + }, + { + "epoch": 14.968421052631578, + "grad_norm": 2.437748662487138e-06, + "learning_rate": 0.00017023157894736844, + "logits/chosen": 13.208243370056152, + "logits/rejected": 13.208243370056152, + "logps/chosen": -4880.294921875, + "logps/rejected": -4880.294921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1823425292969, + "rewards/margins": 0.0, + "rewards/rejected": -485.1823425292969, + "step": 1422 + }, + { + "epoch": 14.978947368421053, + "grad_norm": 2.550890712882392e-06, + "learning_rate": 0.0001702105263157895, + "logits/chosen": 13.211731910705566, + "logits/rejected": 13.211731910705566, + "logps/chosen": -4880.76220703125, + "logps/rejected": -4880.76220703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.22906494140625, + "rewards/margins": 0.0, + "rewards/rejected": -485.22906494140625, + "step": 1423 + }, + { + "epoch": 14.989473684210527, + "grad_norm": 2.1681428279407555e-06, + "learning_rate": 0.00017018947368421054, + "logits/chosen": 13.151742935180664, + "logits/rejected": 13.151742935180664, + "logps/chosen": -3997.357421875, + "logps/rejected": -3997.357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9084167480469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9084167480469, + "step": 1424 + }, + { + "epoch": 15.0, + "grad_norm": 1.658588416830753e-06, + "learning_rate": 0.00017016842105263156, + "logits/chosen": 13.15597915649414, + "logits/rejected": 13.15597915649414, + "logps/chosen": -3997.861328125, + "logps/rejected": -3997.861328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9588317871094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9588317871094, + "step": 1425 + }, + { + "epoch": 15.010526315789473, + "grad_norm": 1.2933398920722539e-06, + "learning_rate": 0.00017014736842105264, + "logits/chosen": 13.156377792358398, + "logits/rejected": 13.156377792358398, + "logps/chosen": -3998.240234375, + "logps/rejected": -3998.240234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9967041015625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9967041015625, + "step": 1426 + }, + { + "epoch": 15.021052631578947, + "grad_norm": 9.10378957996727e-07, + "learning_rate": 0.0001701263157894737, + "logits/chosen": 13.14017391204834, + "logits/rejected": 13.14017391204834, + "logps/chosen": -2673.80859375, + "logps/rejected": -2673.80859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5992736816406, + "rewards/margins": 0.0, + "rewards/rejected": -264.5992736816406, + "step": 1427 + }, + { + "epoch": 15.031578947368422, + "grad_norm": 8.363235224351229e-07, + "learning_rate": 0.00017010526315789474, + "logits/chosen": 13.135923385620117, + "logits/rejected": 13.135923385620117, + "logps/chosen": -2673.908203125, + "logps/rejected": -2673.908203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6092224121094, + "rewards/margins": 0.0, + "rewards/rejected": -264.6092224121094, + "step": 1428 + }, + { + "epoch": 15.042105263157895, + "grad_norm": 1.8444816305418499e-06, + "learning_rate": 0.0001700842105263158, + "logits/chosen": 13.142634391784668, + "logits/rejected": 13.142634391784668, + "logps/chosen": -3999.39453125, + "logps/rejected": -3999.39453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1121520996094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1121520996094, + "step": 1429 + }, + { + "epoch": 15.052631578947368, + "grad_norm": 8.300196441268781e-07, + "learning_rate": 0.00017006315789473686, + "logits/chosen": 13.121553421020508, + "logits/rejected": 13.121553421020508, + "logps/chosen": -2674.248046875, + "logps/rejected": -2674.248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6432189941406, + "rewards/margins": 0.0, + "rewards/rejected": -264.6432189941406, + "step": 1430 + }, + { + "epoch": 15.063157894736841, + "grad_norm": 3.89398519473616e-06, + "learning_rate": 0.00017004210526315791, + "logits/chosen": 13.18561840057373, + "logits/rejected": 13.18561840057373, + "logps/chosen": -4882.1416015625, + "logps/rejected": -4882.1416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.36700439453125, + "rewards/margins": 0.0, + "rewards/rejected": -485.36700439453125, + "step": 1431 + }, + { + "epoch": 15.073684210526316, + "grad_norm": 9.39815095080121e-07, + "learning_rate": 0.00017002105263157894, + "logits/chosen": 13.12387466430664, + "logits/rejected": 13.12387466430664, + "logps/chosen": -3541.009765625, + "logps/rejected": -3541.009765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.0751037597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.0751037597656, + "step": 1432 + }, + { + "epoch": 15.08421052631579, + "grad_norm": 1.432331600881298e-06, + "learning_rate": 0.00017, + "logits/chosen": 13.124300003051758, + "logits/rejected": 13.124300003051758, + "logps/chosen": -4290.08984375, + "logps/rejected": -4290.08984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.2116394042969, + "rewards/margins": 0.0, + "rewards/rejected": -426.2116394042969, + "step": 1433 + }, + { + "epoch": 15.094736842105263, + "grad_norm": 1.4277686659625033e-06, + "learning_rate": 0.00016997894736842106, + "logits/chosen": 13.105451583862305, + "logits/rejected": 13.105451583862305, + "logps/chosen": -3757.8203125, + "logps/rejected": -3757.8203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8637390136719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8637390136719, + "step": 1434 + }, + { + "epoch": 15.105263157894736, + "grad_norm": 1.6716501249902649e-06, + "learning_rate": 0.0001699578947368421, + "logits/chosen": 13.164885520935059, + "logits/rejected": 13.164885520935059, + "logps/chosen": -5173.11865234375, + "logps/rejected": -5173.11865234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3734741210938, + "rewards/margins": 0.0, + "rewards/rejected": -514.3734741210938, + "step": 1435 + }, + { + "epoch": 15.115789473684211, + "grad_norm": 1.701274868537439e-06, + "learning_rate": 0.00016993684210526316, + "logits/chosen": 13.162287712097168, + "logits/rejected": 13.162287712097168, + "logps/chosen": -5173.2021484375, + "logps/rejected": -5173.2021484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3817749023438, + "rewards/margins": 0.0, + "rewards/rejected": -514.3817749023438, + "step": 1436 + }, + { + "epoch": 15.126315789473685, + "grad_norm": 2.2852630081615644e-06, + "learning_rate": 0.00016991578947368424, + "logits/chosen": 13.1080904006958, + "logits/rejected": 13.1080904006958, + "logps/chosen": -4289.6025390625, + "logps/rejected": -4289.6025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.16290283203125, + "rewards/margins": 0.0, + "rewards/rejected": -426.16290283203125, + "step": 1437 + }, + { + "epoch": 15.136842105263158, + "grad_norm": 2.016254939007922e-06, + "learning_rate": 0.00016989473684210526, + "logits/chosen": 13.092247009277344, + "logits/rejected": 13.092247009277344, + "logps/chosen": -4000.6171875, + "logps/rejected": -4000.6171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.2344055175781, + "rewards/margins": 0.0, + "rewards/rejected": -397.2344055175781, + "step": 1438 + }, + { + "epoch": 15.147368421052631, + "grad_norm": 1.459816530768876e-06, + "learning_rate": 0.0001698736842105263, + "logits/chosen": 13.082552909851074, + "logits/rejected": 13.082552909851074, + "logps/chosen": -2674.099609375, + "logps/rejected": -2674.099609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6283874511719, + "rewards/margins": 0.0, + "rewards/rejected": -264.6283874511719, + "step": 1439 + }, + { + "epoch": 15.157894736842104, + "grad_norm": 1.1665380270642345e-06, + "learning_rate": 0.00016985263157894738, + "logits/chosen": 13.103185653686523, + "logits/rejected": 13.103185653686523, + "logps/chosen": -4001.138671875, + "logps/rejected": -4001.138671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.28656005859375, + "rewards/margins": 0.0, + "rewards/rejected": -397.28656005859375, + "step": 1440 + }, + { + "epoch": 15.16842105263158, + "grad_norm": 2.090728685288923e-06, + "learning_rate": 0.00016983157894736843, + "logits/chosen": 13.180010795593262, + "logits/rejected": 13.180010795593262, + "logps/chosen": -5173.89599609375, + "logps/rejected": -5173.89599609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.451171875, + "rewards/margins": 0.0, + "rewards/rejected": -514.451171875, + "step": 1441 + }, + { + "epoch": 15.178947368421053, + "grad_norm": 1.0571292250460829e-06, + "learning_rate": 0.00016981052631578948, + "logits/chosen": 13.112493515014648, + "logits/rejected": 13.112493515014648, + "logps/chosen": -4001.369140625, + "logps/rejected": -4001.369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.3096008300781, + "rewards/margins": 0.0, + "rewards/rejected": -397.3096008300781, + "step": 1442 + }, + { + "epoch": 15.189473684210526, + "grad_norm": 1.5526258039244567e-06, + "learning_rate": 0.00016978947368421053, + "logits/chosen": 13.149299621582031, + "logits/rejected": 13.149299621582031, + "logps/chosen": -4324.1875, + "logps/rejected": -4324.1875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.23388671875, + "rewards/margins": 0.0, + "rewards/rejected": -429.23388671875, + "step": 1443 + }, + { + "epoch": 15.2, + "grad_norm": 1.6236004967140616e-06, + "learning_rate": 0.00016976842105263158, + "logits/chosen": 13.119844436645508, + "logits/rejected": 13.119844436645508, + "logps/chosen": -4001.435546875, + "logps/rejected": -4001.435546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.3162536621094, + "rewards/margins": 0.0, + "rewards/rejected": -397.3162536621094, + "step": 1444 + }, + { + "epoch": 15.210526315789474, + "grad_norm": 9.579836159900879e-07, + "learning_rate": 0.00016974736842105263, + "logits/chosen": 13.129072189331055, + "logits/rejected": 13.129072189331055, + "logps/chosen": -3541.03515625, + "logps/rejected": -3541.03515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.07763671875, + "rewards/margins": 0.0, + "rewards/rejected": -351.07763671875, + "step": 1445 + }, + { + "epoch": 15.221052631578948, + "grad_norm": 1.7672175545158098e-06, + "learning_rate": 0.00016972631578947368, + "logits/chosen": 13.120251655578613, + "logits/rejected": 13.120251655578613, + "logps/chosen": -4001.60546875, + "logps/rejected": -4001.60546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.3332214355469, + "rewards/margins": 0.0, + "rewards/rejected": -397.3332214355469, + "step": 1446 + }, + { + "epoch": 15.23157894736842, + "grad_norm": 1.3236641507319291e-06, + "learning_rate": 0.00016970526315789476, + "logits/chosen": 13.14944076538086, + "logits/rejected": 13.14944076538086, + "logps/chosen": -4324.9453125, + "logps/rejected": -4324.9453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3096618652344, + "rewards/margins": 0.0, + "rewards/rejected": -429.3096618652344, + "step": 1447 + }, + { + "epoch": 15.242105263157894, + "grad_norm": 1.3325736745173344e-06, + "learning_rate": 0.0001696842105263158, + "logits/chosen": 13.112533569335938, + "logits/rejected": 13.112533569335938, + "logps/chosen": -4002.443359375, + "logps/rejected": -4002.443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.4170227050781, + "rewards/margins": 0.0, + "rewards/rejected": -397.4170227050781, + "step": 1448 + }, + { + "epoch": 15.25263157894737, + "grad_norm": 1.0645068186931894e-06, + "learning_rate": 0.00016966315789473685, + "logits/chosen": 13.113541603088379, + "logits/rejected": 13.113541603088379, + "logps/chosen": -3758.01171875, + "logps/rejected": -3758.01171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.88287353515625, + "rewards/margins": 0.0, + "rewards/rejected": -372.88287353515625, + "step": 1449 + }, + { + "epoch": 15.263157894736842, + "grad_norm": 1.0816390840773238e-06, + "learning_rate": 0.0001696421052631579, + "logits/chosen": 13.101728439331055, + "logits/rejected": 13.101728439331055, + "logps/chosen": -2967.015625, + "logps/rejected": -2967.015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9753723144531, + "rewards/margins": 0.0, + "rewards/rejected": -293.9753723144531, + "step": 1450 + }, + { + "epoch": 15.263157894736842, + "eval_logits/chosen": 13.13160514831543, + "eval_logits/rejected": 13.13160514831543, + "eval_logps/chosen": -4309.7705078125, + "eval_logps/rejected": -4309.7705078125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.07391357421875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.07391357421875, + "eval_runtime": 4.2283, + "eval_samples_per_second": 2.365, + "eval_steps_per_second": 2.365, + "step": 1450 + }, + { + "epoch": 15.273684210526316, + "grad_norm": 1.2565507176987012e-06, + "learning_rate": 0.00016962105263157895, + "logits/chosen": 13.11225700378418, + "logits/rejected": 13.11225700378418, + "logps/chosen": -4289.29296875, + "logps/rejected": -4289.29296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1319274902344, + "rewards/margins": 0.0, + "rewards/rejected": -426.1319274902344, + "step": 1451 + }, + { + "epoch": 15.284210526315789, + "grad_norm": 1.4384738733497215e-06, + "learning_rate": 0.0001696, + "logits/chosen": 13.09652328491211, + "logits/rejected": 13.09652328491211, + "logps/chosen": -3541.744140625, + "logps/rejected": -3541.744140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1485290527344, + "rewards/margins": 0.0, + "rewards/rejected": -351.1485290527344, + "step": 1452 + }, + { + "epoch": 15.294736842105262, + "grad_norm": 1.4498164091492072e-06, + "learning_rate": 0.00016957894736842105, + "logits/chosen": 13.093693733215332, + "logits/rejected": 13.093693733215332, + "logps/chosen": -3758.248046875, + "logps/rejected": -3758.248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.906494140625, + "rewards/margins": 0.0, + "rewards/rejected": -372.906494140625, + "step": 1453 + }, + { + "epoch": 15.305263157894737, + "grad_norm": 1.8467335394234397e-06, + "learning_rate": 0.00016955789473684213, + "logits/chosen": 13.094812393188477, + "logits/rejected": 13.094812393188477, + "logps/chosen": -3775.763671875, + "logps/rejected": -3775.763671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7072448730469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7072448730469, + "step": 1454 + }, + { + "epoch": 15.31578947368421, + "grad_norm": 2.647607288963627e-06, + "learning_rate": 0.00016953684210526318, + "logits/chosen": 13.159708023071289, + "logits/rejected": 13.159708023071289, + "logps/chosen": -5174.69873046875, + "logps/rejected": -5174.69873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5314331054688, + "rewards/margins": 0.0, + "rewards/rejected": -514.5314331054688, + "step": 1455 + }, + { + "epoch": 15.326315789473684, + "grad_norm": 1.782884623935388e-06, + "learning_rate": 0.00016951578947368423, + "logits/chosen": 13.152277946472168, + "logits/rejected": 13.152277946472168, + "logps/chosen": -4878.61669921875, + "logps/rejected": -4878.61669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0144958496094, + "rewards/margins": 0.0, + "rewards/rejected": -485.0144958496094, + "step": 1456 + }, + { + "epoch": 15.336842105263157, + "grad_norm": 1.2863453093814314e-06, + "learning_rate": 0.00016949473684210525, + "logits/chosen": 13.116669654846191, + "logits/rejected": 13.116669654846191, + "logps/chosen": -4289.73583984375, + "logps/rejected": -4289.73583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1762390136719, + "rewards/margins": 0.0, + "rewards/rejected": -426.1762390136719, + "step": 1457 + }, + { + "epoch": 15.347368421052632, + "grad_norm": 1.8738066955847898e-06, + "learning_rate": 0.00016947368421052633, + "logits/chosen": 13.173236846923828, + "logits/rejected": 13.173236846923828, + "logps/chosen": -5175.06787109375, + "logps/rejected": -5175.06787109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.568359375, + "rewards/margins": 0.0, + "rewards/rejected": -514.568359375, + "step": 1458 + }, + { + "epoch": 15.357894736842105, + "grad_norm": 1.9695919490914093e-06, + "learning_rate": 0.00016945263157894737, + "logits/chosen": 13.11767864227295, + "logits/rejected": 13.11767864227295, + "logps/chosen": -3776.349609375, + "logps/rejected": -3776.349609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7658386230469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7658386230469, + "step": 1459 + }, + { + "epoch": 15.368421052631579, + "grad_norm": 1.5616370774296229e-06, + "learning_rate": 0.00016943157894736842, + "logits/chosen": 13.127367973327637, + "logits/rejected": 13.127367973327637, + "logps/chosen": -3543.095703125, + "logps/rejected": -3543.095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.28369140625, + "rewards/margins": 0.0, + "rewards/rejected": -351.28369140625, + "step": 1460 + }, + { + "epoch": 15.378947368421052, + "grad_norm": 1.3086751096125226e-06, + "learning_rate": 0.00016941052631578947, + "logits/chosen": 13.191308975219727, + "logits/rejected": 13.191308975219727, + "logps/chosen": -4879.3369140625, + "logps/rejected": -4879.3369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0865173339844, + "rewards/margins": 0.0, + "rewards/rejected": -485.0865173339844, + "step": 1461 + }, + { + "epoch": 15.389473684210527, + "grad_norm": 1.0606236173771322e-06, + "learning_rate": 0.00016938947368421055, + "logits/chosen": 13.142932891845703, + "logits/rejected": 13.142932891845703, + "logps/chosen": -4000.73046875, + "logps/rejected": -4000.73046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.2457275390625, + "rewards/margins": 0.0, + "rewards/rejected": -397.2457275390625, + "step": 1462 + }, + { + "epoch": 15.4, + "grad_norm": 9.446542890145793e-07, + "learning_rate": 0.00016936842105263157, + "logits/chosen": 13.138042449951172, + "logits/rejected": 13.138042449951172, + "logps/chosen": -2673.4072265625, + "logps/rejected": -2673.4072265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.55914306640625, + "rewards/margins": 0.0, + "rewards/rejected": -264.55914306640625, + "step": 1463 + }, + { + "epoch": 15.410526315789474, + "grad_norm": 2.5797266971494537e-06, + "learning_rate": 0.00016934736842105262, + "logits/chosen": 13.229494094848633, + "logits/rejected": 13.229494094848633, + "logps/chosen": -5176.02880859375, + "logps/rejected": -5176.02880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6644897460938, + "rewards/margins": 0.0, + "rewards/rejected": -514.6644897460938, + "step": 1464 + }, + { + "epoch": 15.421052631578947, + "grad_norm": 1.0844455573533196e-06, + "learning_rate": 0.0001693263157894737, + "logits/chosen": 13.173554420471191, + "logits/rejected": 13.173554420471191, + "logps/chosen": -3759.541015625, + "logps/rejected": -3759.541015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0357971191406, + "rewards/margins": 0.0, + "rewards/rejected": -373.0357971191406, + "step": 1465 + }, + { + "epoch": 15.431578947368422, + "grad_norm": 1.138741708928137e-06, + "learning_rate": 0.00016930526315789475, + "logits/chosen": 13.177787780761719, + "logits/rejected": 13.177787780761719, + "logps/chosen": -3759.552734375, + "logps/rejected": -3759.552734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0369567871094, + "rewards/margins": 0.0, + "rewards/rejected": -373.0369567871094, + "step": 1466 + }, + { + "epoch": 15.442105263157895, + "grad_norm": 2.0837719603150617e-06, + "learning_rate": 0.0001692842105263158, + "logits/chosen": 13.241813659667969, + "logits/rejected": 13.241813659667969, + "logps/chosen": -5176.51611328125, + "logps/rejected": -5176.51611328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7131958007812, + "rewards/margins": 0.0, + "rewards/rejected": -514.7131958007812, + "step": 1467 + }, + { + "epoch": 15.452631578947368, + "grad_norm": 1.546308453725942e-06, + "learning_rate": 0.00016926315789473684, + "logits/chosen": 13.185162544250488, + "logits/rejected": 13.185162544250488, + "logps/chosen": -3778.318359375, + "logps/rejected": -3778.318359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9627380371094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9627380371094, + "step": 1468 + }, + { + "epoch": 15.463157894736842, + "grad_norm": 1.0050520131699159e-06, + "learning_rate": 0.00016924210526315792, + "logits/chosen": 13.16478443145752, + "logits/rejected": 13.16478443145752, + "logps/chosen": -2673.3095703125, + "logps/rejected": -2673.3095703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.54937744140625, + "rewards/margins": 0.0, + "rewards/rejected": -264.54937744140625, + "step": 1469 + }, + { + "epoch": 15.473684210526315, + "grad_norm": 1.3186632941142307e-06, + "learning_rate": 0.00016922105263157894, + "logits/chosen": 13.191641807556152, + "logits/rejected": 13.191641807556152, + "logps/chosen": -3543.771484375, + "logps/rejected": -3543.771484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3512878417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.3512878417969, + "step": 1470 + }, + { + "epoch": 15.48421052631579, + "grad_norm": 1.375670763081871e-06, + "learning_rate": 0.0001692, + "logits/chosen": 13.245780944824219, + "logits/rejected": 13.245780944824219, + "logps/chosen": -4880.0185546875, + "logps/rejected": -4880.0185546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1546936035156, + "rewards/margins": 0.0, + "rewards/rejected": -485.1546936035156, + "step": 1471 + }, + { + "epoch": 15.494736842105263, + "grad_norm": 9.17223019314406e-07, + "learning_rate": 0.00016917894736842107, + "logits/chosen": 13.180889129638672, + "logits/rejected": 13.180889129638672, + "logps/chosen": -2673.4375, + "logps/rejected": -2673.4375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5621643066406, + "rewards/margins": 0.0, + "rewards/rejected": -264.5621643066406, + "step": 1472 + }, + { + "epoch": 15.505263157894737, + "grad_norm": 9.388008948008064e-07, + "learning_rate": 0.00016915789473684212, + "logits/chosen": 13.20954704284668, + "logits/rejected": 13.20954704284668, + "logps/chosen": -3544.189453125, + "logps/rejected": -3544.189453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.39306640625, + "rewards/margins": 0.0, + "rewards/rejected": -351.39306640625, + "step": 1473 + }, + { + "epoch": 15.51578947368421, + "grad_norm": 8.480238875563373e-07, + "learning_rate": 0.00016913684210526317, + "logits/chosen": 13.216889381408691, + "logits/rejected": 13.216889381408691, + "logps/chosen": -3544.5859375, + "logps/rejected": -3544.5859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4327087402344, + "rewards/margins": 0.0, + "rewards/rejected": -351.4327087402344, + "step": 1474 + }, + { + "epoch": 15.526315789473685, + "grad_norm": 8.220295057981275e-07, + "learning_rate": 0.00016911578947368422, + "logits/chosen": 13.203375816345215, + "logits/rejected": 13.203375816345215, + "logps/chosen": -2674.0849609375, + "logps/rejected": -2674.0849609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6269226074219, + "rewards/margins": 0.0, + "rewards/rejected": -264.6269226074219, + "step": 1475 + }, + { + "epoch": 15.536842105263158, + "grad_norm": 7.893989391050127e-07, + "learning_rate": 0.00016909473684210527, + "logits/chosen": 13.233211517333984, + "logits/rejected": 13.233211517333984, + "logps/chosen": -3544.9033203125, + "logps/rejected": -3544.9033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4644470214844, + "rewards/margins": 0.0, + "rewards/rejected": -351.4644470214844, + "step": 1476 + }, + { + "epoch": 15.547368421052632, + "grad_norm": 2.275001406815136e-06, + "learning_rate": 0.00016907368421052632, + "logits/chosen": 13.230999946594238, + "logits/rejected": 13.230999946594238, + "logps/chosen": -3996.65234375, + "logps/rejected": -3996.65234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8379211425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8379211425781, + "step": 1477 + }, + { + "epoch": 15.557894736842105, + "grad_norm": 8.026908631109109e-07, + "learning_rate": 0.00016905263157894736, + "logits/chosen": 13.231245040893555, + "logits/rejected": 13.231245040893555, + "logps/chosen": -2968.416015625, + "logps/rejected": -2968.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.11541748046875, + "rewards/margins": 0.0, + "rewards/rejected": -294.11541748046875, + "step": 1478 + }, + { + "epoch": 15.568421052631578, + "grad_norm": 7.65748609410366e-07, + "learning_rate": 0.00016903157894736844, + "logits/chosen": 13.237869262695312, + "logits/rejected": 13.237869262695312, + "logps/chosen": -3545.455078125, + "logps/rejected": -3545.455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5196228027344, + "rewards/margins": 0.0, + "rewards/rejected": -351.5196228027344, + "step": 1479 + }, + { + "epoch": 15.578947368421053, + "grad_norm": 1.9207375316909747e-06, + "learning_rate": 0.0001690105263157895, + "logits/chosen": 13.28357982635498, + "logits/rejected": 13.28357982635498, + "logps/chosen": -4879.654296875, + "logps/rejected": -4879.654296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1182556152344, + "rewards/margins": 0.0, + "rewards/rejected": -485.1182556152344, + "step": 1480 + }, + { + "epoch": 15.589473684210526, + "grad_norm": 8.145337915266282e-07, + "learning_rate": 0.00016898947368421054, + "logits/chosen": 13.211501121520996, + "logits/rejected": 13.211501121520996, + "logps/chosen": -2674.8994140625, + "logps/rejected": -2674.8994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7083435058594, + "rewards/margins": 0.0, + "rewards/rejected": -264.7083435058594, + "step": 1481 + }, + { + "epoch": 15.6, + "grad_norm": 1.0985201015500934e-06, + "learning_rate": 0.0001689684210526316, + "logits/chosen": 13.23165225982666, + "logits/rejected": 13.23165225982666, + "logps/chosen": -3545.8486328125, + "logps/rejected": -3545.8486328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5589904785156, + "rewards/margins": 0.0, + "rewards/rejected": -351.5589904785156, + "step": 1482 + }, + { + "epoch": 15.610526315789473, + "grad_norm": 1.3010347856834414e-06, + "learning_rate": 0.00016894736842105264, + "logits/chosen": 13.229446411132812, + "logits/rejected": 13.229446411132812, + "logps/chosen": -3759.5693359375, + "logps/rejected": -3759.5693359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.03863525390625, + "rewards/margins": 0.0, + "rewards/rejected": -373.03863525390625, + "step": 1483 + }, + { + "epoch": 15.621052631578948, + "grad_norm": 1.4617021406593267e-06, + "learning_rate": 0.0001689263157894737, + "logits/chosen": 13.28757381439209, + "logits/rejected": 13.28757381439209, + "logps/chosen": -5177.04248046875, + "logps/rejected": -5177.04248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7658081054688, + "rewards/margins": 0.0, + "rewards/rejected": -514.7658081054688, + "step": 1484 + }, + { + "epoch": 15.631578947368421, + "grad_norm": 1.2254095054231584e-06, + "learning_rate": 0.00016890526315789474, + "logits/chosen": 13.277099609375, + "logits/rejected": 13.277099609375, + "logps/chosen": -4879.13427734375, + "logps/rejected": -4879.13427734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0662536621094, + "rewards/margins": 0.0, + "rewards/rejected": -485.0662536621094, + "step": 1485 + }, + { + "epoch": 15.642105263157895, + "grad_norm": 1.277786623177235e-06, + "learning_rate": 0.0001688842105263158, + "logits/chosen": 13.277246475219727, + "logits/rejected": 13.277246475219727, + "logps/chosen": -4879.3466796875, + "logps/rejected": -4879.3466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0874938964844, + "rewards/margins": 0.0, + "rewards/rejected": -485.0874938964844, + "step": 1486 + }, + { + "epoch": 15.652631578947368, + "grad_norm": 1.4210424978955416e-06, + "learning_rate": 0.00016886315789473686, + "logits/chosen": 13.219776153564453, + "logits/rejected": 13.219776153564453, + "logps/chosen": -2968.2109375, + "logps/rejected": -2968.2109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.09490966796875, + "rewards/margins": 0.0, + "rewards/rejected": -294.09490966796875, + "step": 1487 + }, + { + "epoch": 15.663157894736843, + "grad_norm": 1.5450937098648865e-06, + "learning_rate": 0.0001688421052631579, + "logits/chosen": 13.233162879943848, + "logits/rejected": 13.233162879943848, + "logps/chosen": -3778.689453125, + "logps/rejected": -3778.689453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9998474121094, + "rewards/margins": 0.0, + "rewards/rejected": -374.9998474121094, + "step": 1488 + }, + { + "epoch": 15.673684210526316, + "grad_norm": 1.0246831152471714e-06, + "learning_rate": 0.00016882105263157893, + "logits/chosen": 13.235774040222168, + "logits/rejected": 13.235774040222168, + "logps/chosen": -3546.1328125, + "logps/rejected": -3546.1328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.58740234375, + "rewards/margins": 0.0, + "rewards/rejected": -351.58740234375, + "step": 1489 + }, + { + "epoch": 15.68421052631579, + "grad_norm": 8.195901841645536e-07, + "learning_rate": 0.0001688, + "logits/chosen": 13.241355895996094, + "logits/rejected": 13.241355895996094, + "logps/chosen": -3546.193359375, + "logps/rejected": -3546.193359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5934753417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.5934753417969, + "step": 1490 + }, + { + "epoch": 15.694736842105263, + "grad_norm": 3.489791424726718e-06, + "learning_rate": 0.00016877894736842106, + "logits/chosen": 13.30412769317627, + "logits/rejected": 13.30412769317627, + "logps/chosen": -5176.5517578125, + "logps/rejected": -5176.5517578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7167358398438, + "rewards/margins": 0.0, + "rewards/rejected": -514.7167358398438, + "step": 1491 + }, + { + "epoch": 15.705263157894738, + "grad_norm": 1.3516136050384375e-06, + "learning_rate": 0.0001687578947368421, + "logits/chosen": 13.275618553161621, + "logits/rejected": 13.275618553161621, + "logps/chosen": -4326.9931640625, + "logps/rejected": -4326.9931640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.51446533203125, + "rewards/margins": 0.0, + "rewards/rejected": -429.51446533203125, + "step": 1492 + }, + { + "epoch": 15.715789473684211, + "grad_norm": 1.6686075241523213e-06, + "learning_rate": 0.00016873684210526316, + "logits/chosen": 13.245379447937012, + "logits/rejected": 13.245379447937012, + "logps/chosen": -3995.63671875, + "logps/rejected": -3995.63671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.7363586425781, + "rewards/margins": 0.0, + "rewards/rejected": -396.7363586425781, + "step": 1493 + }, + { + "epoch": 15.726315789473684, + "grad_norm": 2.5728875243657967e-06, + "learning_rate": 0.00016871578947368423, + "logits/chosen": 13.30986213684082, + "logits/rejected": 13.30986213684082, + "logps/chosen": -5177.07421875, + "logps/rejected": -5177.07421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7689819335938, + "rewards/margins": 0.0, + "rewards/rejected": -514.7689819335938, + "step": 1494 + }, + { + "epoch": 15.736842105263158, + "grad_norm": 1.4438797961702221e-06, + "learning_rate": 0.00016869473684210526, + "logits/chosen": 13.261364936828613, + "logits/rejected": 13.261364936828613, + "logps/chosen": -4287.255859375, + "logps/rejected": -4287.255859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.92822265625, + "rewards/margins": 0.0, + "rewards/rejected": -425.92822265625, + "step": 1495 + }, + { + "epoch": 15.74736842105263, + "grad_norm": 1.357111841571168e-06, + "learning_rate": 0.0001686736842105263, + "logits/chosen": 13.235664367675781, + "logits/rejected": 13.235664367675781, + "logps/chosen": -3995.97265625, + "logps/rejected": -3995.97265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.76995849609375, + "rewards/margins": 0.0, + "rewards/rejected": -396.76995849609375, + "step": 1496 + }, + { + "epoch": 15.757894736842106, + "grad_norm": 1.9852880086546065e-06, + "learning_rate": 0.00016865263157894738, + "logits/chosen": 13.28403091430664, + "logits/rejected": 13.28403091430664, + "logps/chosen": -4880.365234375, + "logps/rejected": -4880.365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1893615722656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1893615722656, + "step": 1497 + }, + { + "epoch": 15.76842105263158, + "grad_norm": 1.8101868590747472e-06, + "learning_rate": 0.00016863157894736843, + "logits/chosen": 13.219038009643555, + "logits/rejected": 13.219038009643555, + "logps/chosen": -3996.298828125, + "logps/rejected": -3996.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8025817871094, + "rewards/margins": 0.0, + "rewards/rejected": -396.8025817871094, + "step": 1498 + }, + { + "epoch": 15.778947368421052, + "grad_norm": 2.113829168592929e-06, + "learning_rate": 0.00016861052631578948, + "logits/chosen": 13.209192276000977, + "logits/rejected": 13.209192276000977, + "logps/chosen": -2967.923828125, + "logps/rejected": -2967.923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0661926269531, + "rewards/margins": 0.0, + "rewards/rejected": -294.0661926269531, + "step": 1499 + }, + { + "epoch": 15.789473684210526, + "grad_norm": 1.437405785509327e-06, + "learning_rate": 0.00016858947368421053, + "logits/chosen": 13.205979347229004, + "logits/rejected": 13.205979347229004, + "logps/chosen": -3997.12890625, + "logps/rejected": -3997.12890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8855895996094, + "rewards/margins": 0.0, + "rewards/rejected": -396.8855895996094, + "step": 1500 + }, + { + "epoch": 15.789473684210526, + "eval_logits/chosen": 13.236076354980469, + "eval_logits/rejected": 13.236076354980469, + "eval_logps/chosen": -4311.6533203125, + "eval_logps/rejected": -4311.6533203125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.26220703125, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.26220703125, + "eval_runtime": 4.5412, + "eval_samples_per_second": 2.202, + "eval_steps_per_second": 2.202, + "step": 1500 + }, + { + "epoch": 15.8, + "grad_norm": 1.4228317013476044e-06, + "learning_rate": 0.0001685684210526316, + "logits/chosen": 13.210309982299805, + "logits/rejected": 13.210309982299805, + "logps/chosen": -3545.455078125, + "logps/rejected": -3545.455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5196228027344, + "rewards/margins": 0.0, + "rewards/rejected": -351.5196228027344, + "step": 1501 + }, + { + "epoch": 15.810526315789474, + "grad_norm": 1.2206730843900004e-06, + "learning_rate": 0.00016854736842105263, + "logits/chosen": 13.196483612060547, + "logits/rejected": 13.196483612060547, + "logps/chosen": -3998.130859375, + "logps/rejected": -3998.130859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.98577880859375, + "rewards/margins": 0.0, + "rewards/rejected": -396.98577880859375, + "step": 1502 + }, + { + "epoch": 15.821052631578947, + "grad_norm": 1.321790136898926e-06, + "learning_rate": 0.00016852631578947368, + "logits/chosen": 13.20278549194336, + "logits/rejected": 13.20278549194336, + "logps/chosen": -3778.0625, + "logps/rejected": -3778.0625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9371337890625, + "rewards/margins": 0.0, + "rewards/rejected": -374.9371337890625, + "step": 1503 + }, + { + "epoch": 15.83157894736842, + "grad_norm": 8.005339964256564e-07, + "learning_rate": 0.00016850526315789475, + "logits/chosen": 13.18626880645752, + "logits/rejected": 13.18626880645752, + "logps/chosen": -2968.583984375, + "logps/rejected": -2968.583984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1322021484375, + "rewards/margins": 0.0, + "rewards/rejected": -294.1322021484375, + "step": 1504 + }, + { + "epoch": 15.842105263157894, + "grad_norm": 9.207843163494545e-07, + "learning_rate": 0.0001684842105263158, + "logits/chosen": 13.168615341186523, + "logits/rejected": 13.168615341186523, + "logps/chosen": -2673.6474609375, + "logps/rejected": -2673.6474609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5831604003906, + "rewards/margins": 0.0, + "rewards/rejected": -264.5831604003906, + "step": 1505 + }, + { + "epoch": 15.852631578947369, + "grad_norm": 3.915900833817432e-06, + "learning_rate": 0.00016846315789473685, + "logits/chosen": 13.228930473327637, + "logits/rejected": 13.228930473327637, + "logps/chosen": -4879.8671875, + "logps/rejected": -4879.8671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1395568847656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1395568847656, + "step": 1506 + }, + { + "epoch": 15.863157894736842, + "grad_norm": 7.730230322522402e-07, + "learning_rate": 0.0001684421052631579, + "logits/chosen": 13.165517807006836, + "logits/rejected": 13.165517807006836, + "logps/chosen": -2968.626953125, + "logps/rejected": -2968.626953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1365051269531, + "rewards/margins": 0.0, + "rewards/rejected": -294.1365051269531, + "step": 1507 + }, + { + "epoch": 15.873684210526315, + "grad_norm": 1.2821412838093238e-06, + "learning_rate": 0.00016842105263157895, + "logits/chosen": 13.167649269104004, + "logits/rejected": 13.167649269104004, + "logps/chosen": -3778.802734375, + "logps/rejected": -3778.802734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.01116943359375, + "rewards/margins": 0.0, + "rewards/rejected": -375.01116943359375, + "step": 1508 + }, + { + "epoch": 15.884210526315789, + "grad_norm": 1.4347260730573907e-06, + "learning_rate": 0.0001684, + "logits/chosen": 13.203887939453125, + "logits/rejected": 13.203887939453125, + "logps/chosen": -4880.7763671875, + "logps/rejected": -4880.7763671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.23046875, + "rewards/margins": 0.0, + "rewards/rejected": -485.23046875, + "step": 1509 + }, + { + "epoch": 15.894736842105264, + "grad_norm": 1.2377859093248844e-06, + "learning_rate": 0.00016837894736842105, + "logits/chosen": 13.199368476867676, + "logits/rejected": 13.199368476867676, + "logps/chosen": -4880.734375, + "logps/rejected": -4880.734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2262878417969, + "rewards/margins": 0.0, + "rewards/rejected": -485.2262878417969, + "step": 1510 + }, + { + "epoch": 15.905263157894737, + "grad_norm": 1.5752422086734441e-06, + "learning_rate": 0.00016835789473684213, + "logits/chosen": 13.195993423461914, + "logits/rejected": 13.195993423461914, + "logps/chosen": -4880.8310546875, + "logps/rejected": -4880.8310546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2359313964844, + "rewards/margins": 0.0, + "rewards/rejected": -485.2359313964844, + "step": 1511 + }, + { + "epoch": 15.91578947368421, + "grad_norm": 2.0205939108564053e-06, + "learning_rate": 0.00016833684210526318, + "logits/chosen": 13.194650650024414, + "logits/rejected": 13.194650650024414, + "logps/chosen": -4881.19775390625, + "logps/rejected": -4881.19775390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2726135253906, + "rewards/margins": 0.0, + "rewards/rejected": -485.2726135253906, + "step": 1512 + }, + { + "epoch": 15.926315789473684, + "grad_norm": 4.246265234542079e-06, + "learning_rate": 0.00016831578947368422, + "logits/chosen": 13.16921615600586, + "logits/rejected": 13.16921615600586, + "logps/chosen": -4324.853515625, + "logps/rejected": -4324.853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3005065917969, + "rewards/margins": 0.0, + "rewards/rejected": -429.3005065917969, + "step": 1513 + }, + { + "epoch": 15.936842105263159, + "grad_norm": 1.3122472637405735e-06, + "learning_rate": 0.00016829473684210527, + "logits/chosen": 13.204113006591797, + "logits/rejected": 13.204113006591797, + "logps/chosen": -4882.04345703125, + "logps/rejected": -4882.04345703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.357177734375, + "rewards/margins": 0.0, + "rewards/rejected": -485.357177734375, + "step": 1514 + }, + { + "epoch": 15.947368421052632, + "grad_norm": 1.4962099612603197e-06, + "learning_rate": 0.00016827368421052632, + "logits/chosen": 13.176742553710938, + "logits/rejected": 13.176742553710938, + "logps/chosen": -4288.4716796875, + "logps/rejected": -4288.4716796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0498046875, + "rewards/margins": 0.0, + "rewards/rejected": -426.0498046875, + "step": 1515 + }, + { + "epoch": 15.957894736842105, + "grad_norm": 2.5174356323987013e-06, + "learning_rate": 0.00016825263157894737, + "logits/chosen": 13.22823429107666, + "logits/rejected": 13.22823429107666, + "logps/chosen": -5173.9306640625, + "logps/rejected": -5173.9306640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4546508789062, + "rewards/margins": 0.0, + "rewards/rejected": -514.4546508789062, + "step": 1516 + }, + { + "epoch": 15.968421052631578, + "grad_norm": 1.5843913843127666e-06, + "learning_rate": 0.00016823157894736842, + "logits/chosen": 13.20238208770752, + "logits/rejected": 13.20238208770752, + "logps/chosen": -4325.05859375, + "logps/rejected": -4325.05859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3210144042969, + "rewards/margins": 0.0, + "rewards/rejected": -429.3210144042969, + "step": 1517 + }, + { + "epoch": 15.978947368421053, + "grad_norm": 1.2926406043334282e-06, + "learning_rate": 0.0001682105263157895, + "logits/chosen": 13.189013481140137, + "logits/rejected": 13.189013481140137, + "logps/chosen": -3780.138671875, + "logps/rejected": -3780.138671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1447448730469, + "rewards/margins": 0.0, + "rewards/rejected": -375.1447448730469, + "step": 1518 + }, + { + "epoch": 15.989473684210527, + "grad_norm": 9.168121550828801e-07, + "learning_rate": 0.00016818947368421055, + "logits/chosen": 13.193624496459961, + "logits/rejected": 13.193624496459961, + "logps/chosen": -3542.181640625, + "logps/rejected": -3542.181640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1922912597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.1922912597656, + "step": 1519 + }, + { + "epoch": 16.0, + "grad_norm": 3.7533593513217056e-06, + "learning_rate": 0.0001681684210526316, + "logits/chosen": 13.256009101867676, + "logits/rejected": 13.256009101867676, + "logps/chosen": -5174.01318359375, + "logps/rejected": -5174.01318359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.462890625, + "rewards/margins": 0.0, + "rewards/rejected": -514.462890625, + "step": 1520 + }, + { + "epoch": 16.010526315789473, + "grad_norm": 9.518960837340273e-07, + "learning_rate": 0.00016814736842105262, + "logits/chosen": 13.205791473388672, + "logits/rejected": 13.205791473388672, + "logps/chosen": -3542.416015625, + "logps/rejected": -3542.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2157287597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2157287597656, + "step": 1521 + }, + { + "epoch": 16.021052631578947, + "grad_norm": 1.2076296798113617e-06, + "learning_rate": 0.0001681263157894737, + "logits/chosen": 13.201488494873047, + "logits/rejected": 13.201488494873047, + "logps/chosen": -3999.0078125, + "logps/rejected": -3999.0078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0734558105469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0734558105469, + "step": 1522 + }, + { + "epoch": 16.03157894736842, + "grad_norm": 1.3971101679999265e-06, + "learning_rate": 0.00016810526315789474, + "logits/chosen": 13.22538948059082, + "logits/rejected": 13.22538948059082, + "logps/chosen": -4288.8740234375, + "logps/rejected": -4288.8740234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0900573730469, + "rewards/margins": 0.0, + "rewards/rejected": -426.0900573730469, + "step": 1523 + }, + { + "epoch": 16.042105263157893, + "grad_norm": 1.7650563677307218e-06, + "learning_rate": 0.0001680842105263158, + "logits/chosen": 13.214360237121582, + "logits/rejected": 13.214360237121582, + "logps/chosen": -3542.40625, + "logps/rejected": -3542.40625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2147521972656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2147521972656, + "step": 1524 + }, + { + "epoch": 16.05263157894737, + "grad_norm": 1.375620968246949e-06, + "learning_rate": 0.00016806315789473684, + "logits/chosen": 13.206888198852539, + "logits/rejected": 13.206888198852539, + "logps/chosen": -3998.708984375, + "logps/rejected": -3998.708984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0435791015625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0435791015625, + "step": 1525 + }, + { + "epoch": 16.063157894736843, + "grad_norm": 1.6022536328819115e-06, + "learning_rate": 0.00016804210526315792, + "logits/chosen": 13.264443397521973, + "logits/rejected": 13.264443397521973, + "logps/chosen": -4882.97412109375, + "logps/rejected": -4882.97412109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.45025634765625, + "rewards/margins": 0.0, + "rewards/rejected": -485.45025634765625, + "step": 1526 + }, + { + "epoch": 16.073684210526316, + "grad_norm": 1.1701546327458345e-06, + "learning_rate": 0.00016802105263157894, + "logits/chosen": 13.192530632019043, + "logits/rejected": 13.192530632019043, + "logps/chosen": -2672.140625, + "logps/rejected": -2672.140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4324645996094, + "rewards/margins": 0.0, + "rewards/rejected": -264.4324645996094, + "step": 1527 + }, + { + "epoch": 16.08421052631579, + "grad_norm": 1.153848984358774e-06, + "learning_rate": 0.000168, + "logits/chosen": 13.205900192260742, + "logits/rejected": 13.205900192260742, + "logps/chosen": -3999.07421875, + "logps/rejected": -3999.07421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0801086425781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0801086425781, + "step": 1528 + }, + { + "epoch": 16.094736842105263, + "grad_norm": 1.0798538596645813e-06, + "learning_rate": 0.00016797894736842107, + "logits/chosen": 13.26051139831543, + "logits/rejected": 13.26051139831543, + "logps/chosen": -4882.5927734375, + "logps/rejected": -4882.5927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.412109375, + "rewards/margins": 0.0, + "rewards/rejected": -485.412109375, + "step": 1529 + }, + { + "epoch": 16.105263157894736, + "grad_norm": 1.3760369483861723e-06, + "learning_rate": 0.00016795789473684212, + "logits/chosen": 13.1987886428833, + "logits/rejected": 13.1987886428833, + "logps/chosen": -2967.771484375, + "logps/rejected": -2967.771484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.05096435546875, + "rewards/margins": 0.0, + "rewards/rejected": -294.05096435546875, + "step": 1530 + }, + { + "epoch": 16.11578947368421, + "grad_norm": 1.2300752132432535e-06, + "learning_rate": 0.00016793684210526317, + "logits/chosen": 13.212223052978516, + "logits/rejected": 13.212223052978516, + "logps/chosen": -3779.744140625, + "logps/rejected": -3779.744140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1053161621094, + "rewards/margins": 0.0, + "rewards/rejected": -375.1053161621094, + "step": 1531 + }, + { + "epoch": 16.126315789473683, + "grad_norm": 1.8733865090325708e-06, + "learning_rate": 0.00016791578947368421, + "logits/chosen": 13.219634056091309, + "logits/rejected": 13.219634056091309, + "logps/chosen": -4289.3134765625, + "logps/rejected": -4289.3134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1340026855469, + "rewards/margins": 0.0, + "rewards/rejected": -426.1340026855469, + "step": 1532 + }, + { + "epoch": 16.13684210526316, + "grad_norm": 1.3331396075955126e-06, + "learning_rate": 0.00016789473684210526, + "logits/chosen": 13.207313537597656, + "logits/rejected": 13.207313537597656, + "logps/chosen": -3779.89453125, + "logps/rejected": -3779.89453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1203308105469, + "rewards/margins": 0.0, + "rewards/rejected": -375.1203308105469, + "step": 1533 + }, + { + "epoch": 16.147368421052633, + "grad_norm": 1.3025571661273716e-06, + "learning_rate": 0.0001678736842105263, + "logits/chosen": 13.223761558532715, + "logits/rejected": 13.223761558532715, + "logps/chosen": -4326.466796875, + "logps/rejected": -4326.466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4618225097656, + "rewards/margins": 0.0, + "rewards/rejected": -429.4618225097656, + "step": 1534 + }, + { + "epoch": 16.157894736842106, + "grad_norm": 8.950647725214367e-07, + "learning_rate": 0.00016785263157894736, + "logits/chosen": 13.197572708129883, + "logits/rejected": 13.197572708129883, + "logps/chosen": -3542.939453125, + "logps/rejected": -3542.939453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.26806640625, + "rewards/margins": 0.0, + "rewards/rejected": -351.26806640625, + "step": 1535 + }, + { + "epoch": 16.16842105263158, + "grad_norm": 1.7535032839077758e-06, + "learning_rate": 0.00016783157894736844, + "logits/chosen": 13.184588432312012, + "logits/rejected": 13.184588432312012, + "logps/chosen": -3999.541015625, + "logps/rejected": -3999.541015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1268005371094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1268005371094, + "step": 1536 + }, + { + "epoch": 16.178947368421053, + "grad_norm": 9.275355523641338e-07, + "learning_rate": 0.0001678105263157895, + "logits/chosen": 13.188555717468262, + "logits/rejected": 13.188555717468262, + "logps/chosen": -3543.091796875, + "logps/rejected": -3543.091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2832946777344, + "rewards/margins": 0.0, + "rewards/rejected": -351.2832946777344, + "step": 1537 + }, + { + "epoch": 16.189473684210526, + "grad_norm": 9.93465391729842e-07, + "learning_rate": 0.00016778947368421054, + "logits/chosen": 13.184117317199707, + "logits/rejected": 13.184117317199707, + "logps/chosen": -3543.158203125, + "logps/rejected": -3543.158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2899475097656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2899475097656, + "step": 1538 + }, + { + "epoch": 16.2, + "grad_norm": 1.221544380314299e-06, + "learning_rate": 0.0001677684210526316, + "logits/chosen": 13.18503475189209, + "logits/rejected": 13.18503475189209, + "logps/chosen": -3781.2265625, + "logps/rejected": -3781.2265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.2535400390625, + "rewards/margins": 0.0, + "rewards/rejected": -375.2535400390625, + "step": 1539 + }, + { + "epoch": 16.210526315789473, + "grad_norm": 1.1523267176016816e-06, + "learning_rate": 0.00016774736842105264, + "logits/chosen": 13.169259071350098, + "logits/rejected": 13.169259071350098, + "logps/chosen": -3999.501953125, + "logps/rejected": -3999.501953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1228942871094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1228942871094, + "step": 1540 + }, + { + "epoch": 16.221052631578946, + "grad_norm": 1.2691651818386163e-06, + "learning_rate": 0.00016772631578947369, + "logits/chosen": 13.178340911865234, + "logits/rejected": 13.178340911865234, + "logps/chosen": -3781.39453125, + "logps/rejected": -3781.39453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.2703552246094, + "rewards/margins": 0.0, + "rewards/rejected": -375.2703552246094, + "step": 1541 + }, + { + "epoch": 16.231578947368423, + "grad_norm": 9.442172768103774e-07, + "learning_rate": 0.00016770526315789473, + "logits/chosen": 13.14732837677002, + "logits/rejected": 13.14732837677002, + "logps/chosen": -2673.548828125, + "logps/rejected": -2673.548828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.57330322265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.57330322265625, + "step": 1542 + }, + { + "epoch": 16.242105263157896, + "grad_norm": 1.3646114211951499e-06, + "learning_rate": 0.0001676842105263158, + "logits/chosen": 13.164592742919922, + "logits/rejected": 13.164592742919922, + "logps/chosen": -3757.373046875, + "logps/rejected": -3757.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8190002441406, + "rewards/margins": 0.0, + "rewards/rejected": -372.8190002441406, + "step": 1543 + }, + { + "epoch": 16.25263157894737, + "grad_norm": 2.3008640255284263e-06, + "learning_rate": 0.00016766315789473686, + "logits/chosen": 13.185821533203125, + "logits/rejected": 13.185821533203125, + "logps/chosen": -4327.1298828125, + "logps/rejected": -4327.1298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.52813720703125, + "rewards/margins": 0.0, + "rewards/rejected": -429.52813720703125, + "step": 1544 + }, + { + "epoch": 16.263157894736842, + "grad_norm": 2.661973439899157e-06, + "learning_rate": 0.0001676421052631579, + "logits/chosen": 13.220733642578125, + "logits/rejected": 13.220733642578125, + "logps/chosen": -5172.6591796875, + "logps/rejected": -5172.6591796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3275146484375, + "rewards/margins": 0.0, + "rewards/rejected": -514.3275146484375, + "step": 1545 + }, + { + "epoch": 16.273684210526316, + "grad_norm": 1.3652403367814259e-06, + "learning_rate": 0.00016762105263157896, + "logits/chosen": 13.153447151184082, + "logits/rejected": 13.153447151184082, + "logps/chosen": -3999.9140625, + "logps/rejected": -3999.9140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1640930175781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1640930175781, + "step": 1546 + }, + { + "epoch": 16.28421052631579, + "grad_norm": 1.607982198947866e-06, + "learning_rate": 0.0001676, + "logits/chosen": 13.17337417602539, + "logits/rejected": 13.17337417602539, + "logps/chosen": -4290.24560546875, + "logps/rejected": -4290.24560546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.2272033691406, + "rewards/margins": 0.0, + "rewards/rejected": -426.2272033691406, + "step": 1547 + }, + { + "epoch": 16.294736842105262, + "grad_norm": 1.1096141179223196e-06, + "learning_rate": 0.00016757894736842106, + "logits/chosen": 13.156723976135254, + "logits/rejected": 13.156723976135254, + "logps/chosen": -3757.8818359375, + "logps/rejected": -3757.8818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.869873046875, + "rewards/margins": 0.0, + "rewards/rejected": -372.869873046875, + "step": 1548 + }, + { + "epoch": 16.305263157894736, + "grad_norm": 1.1814041727120639e-06, + "learning_rate": 0.0001675578947368421, + "logits/chosen": 13.151932716369629, + "logits/rejected": 13.151932716369629, + "logps/chosen": -3757.9375, + "logps/rejected": -3757.9375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8754577636719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8754577636719, + "step": 1549 + }, + { + "epoch": 16.31578947368421, + "grad_norm": 2.057674237221363e-06, + "learning_rate": 0.00016753684210526318, + "logits/chosen": 13.206674575805664, + "logits/rejected": 13.206674575805664, + "logps/chosen": -5172.7880859375, + "logps/rejected": -5172.7880859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3403930664062, + "rewards/margins": 0.0, + "rewards/rejected": -514.3403930664062, + "step": 1550 + }, + { + "epoch": 16.31578947368421, + "eval_logits/chosen": 13.176271438598633, + "eval_logits/rejected": 13.176271438598633, + "eval_logps/chosen": -4310.7607421875, + "eval_logps/rejected": -4310.7607421875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.1729431152344, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.1729431152344, + "eval_runtime": 4.2869, + "eval_samples_per_second": 2.333, + "eval_steps_per_second": 2.333, + "step": 1550 + }, + { + "epoch": 16.326315789473686, + "grad_norm": 1.1810367368525476e-06, + "learning_rate": 0.00016751578947368423, + "logits/chosen": 13.15052318572998, + "logits/rejected": 13.15052318572998, + "logps/chosen": -3544.3505859375, + "logps/rejected": -3544.3505859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4091796875, + "rewards/margins": 0.0, + "rewards/rejected": -351.4091796875, + "step": 1551 + }, + { + "epoch": 16.33684210526316, + "grad_norm": 1.806598902476253e-06, + "learning_rate": 0.00016749473684210528, + "logits/chosen": 13.210683822631836, + "logits/rejected": 13.210683822631836, + "logps/chosen": -5173.298828125, + "logps/rejected": -5173.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.3914794921875, + "rewards/margins": 0.0, + "rewards/rejected": -514.3914794921875, + "step": 1552 + }, + { + "epoch": 16.347368421052632, + "grad_norm": 1.7224299426743528e-06, + "learning_rate": 0.0001674736842105263, + "logits/chosen": 13.218097686767578, + "logits/rejected": 13.218097686767578, + "logps/chosen": -5174.03369140625, + "logps/rejected": -5174.03369140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.4649658203125, + "rewards/margins": 0.0, + "rewards/rejected": -514.4649658203125, + "step": 1553 + }, + { + "epoch": 16.357894736842105, + "grad_norm": 1.440164396626642e-06, + "learning_rate": 0.00016745263157894738, + "logits/chosen": 13.159080505371094, + "logits/rejected": 13.159080505371094, + "logps/chosen": -2968.6787109375, + "logps/rejected": -2968.6787109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1416931152344, + "rewards/margins": 0.0, + "rewards/rejected": -294.1416931152344, + "step": 1554 + }, + { + "epoch": 16.36842105263158, + "grad_norm": 1.0518540420889622e-06, + "learning_rate": 0.00016743157894736843, + "logits/chosen": 13.186163902282715, + "logits/rejected": 13.186163902282715, + "logps/chosen": -3544.818359375, + "logps/rejected": -3544.818359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4559631347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.4559631347656, + "step": 1555 + }, + { + "epoch": 16.378947368421052, + "grad_norm": 8.651535949866229e-07, + "learning_rate": 0.00016741052631578948, + "logits/chosen": 13.201345443725586, + "logits/rejected": 13.201345443725586, + "logps/chosen": -3545.4052734375, + "logps/rejected": -3545.4052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5146484375, + "rewards/margins": 0.0, + "rewards/rejected": -351.5146484375, + "step": 1556 + }, + { + "epoch": 16.389473684210525, + "grad_norm": 9.128787041845499e-07, + "learning_rate": 0.00016738947368421053, + "logits/chosen": 13.204204559326172, + "logits/rejected": 13.204204559326172, + "logps/chosen": -2969.244140625, + "logps/rejected": -2969.244140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1982116699219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1982116699219, + "step": 1557 + }, + { + "epoch": 16.4, + "grad_norm": 2.8977399324503494e-06, + "learning_rate": 0.0001673684210526316, + "logits/chosen": 13.275843620300293, + "logits/rejected": 13.275843620300293, + "logps/chosen": -4878.45263671875, + "logps/rejected": -4878.45263671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.99810791015625, + "rewards/margins": 0.0, + "rewards/rejected": -484.99810791015625, + "step": 1558 + }, + { + "epoch": 16.410526315789475, + "grad_norm": 2.60602064372506e-06, + "learning_rate": 0.00016734736842105263, + "logits/chosen": 13.284257888793945, + "logits/rejected": 13.284257888793945, + "logps/chosen": -4878.1826171875, + "logps/rejected": -4878.1826171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -484.9710998535156, + "rewards/margins": 0.0, + "rewards/rejected": -484.9710998535156, + "step": 1559 + }, + { + "epoch": 16.42105263157895, + "grad_norm": 2.481759565853281e-06, + "learning_rate": 0.00016732631578947368, + "logits/chosen": 13.297558784484863, + "logits/rejected": 13.297558784484863, + "logps/chosen": -5177.32373046875, + "logps/rejected": -5177.32373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7939453125, + "rewards/margins": 0.0, + "rewards/rejected": -514.7939453125, + "step": 1560 + }, + { + "epoch": 16.431578947368422, + "grad_norm": 1.0028124961536378e-06, + "learning_rate": 0.00016730526315789475, + "logits/chosen": 13.22165584564209, + "logits/rejected": 13.22165584564209, + "logps/chosen": -2673.4052734375, + "logps/rejected": -2673.4052734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5589294433594, + "rewards/margins": 0.0, + "rewards/rejected": -264.5589294433594, + "step": 1561 + }, + { + "epoch": 16.442105263157895, + "grad_norm": 1.7059057881851913e-06, + "learning_rate": 0.0001672842105263158, + "logits/chosen": 13.232525825500488, + "logits/rejected": 13.232525825500488, + "logps/chosen": -3996.466796875, + "logps/rejected": -3996.466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8193664550781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8193664550781, + "step": 1562 + }, + { + "epoch": 16.45263157894737, + "grad_norm": 2.424517560939421e-06, + "learning_rate": 0.00016726315789473685, + "logits/chosen": 13.22361946105957, + "logits/rejected": 13.22361946105957, + "logps/chosen": -2967.9365234375, + "logps/rejected": -2967.9365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0674743652344, + "rewards/margins": 0.0, + "rewards/rejected": -294.0674743652344, + "step": 1563 + }, + { + "epoch": 16.46315789473684, + "grad_norm": 2.439674972265493e-06, + "learning_rate": 0.0001672421052631579, + "logits/chosen": 13.241548538208008, + "logits/rejected": 13.241548538208008, + "logps/chosen": -3545.689453125, + "logps/rejected": -3545.689453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5430603027344, + "rewards/margins": 0.0, + "rewards/rejected": -351.5430603027344, + "step": 1564 + }, + { + "epoch": 16.473684210526315, + "grad_norm": 1.7806263485908858e-06, + "learning_rate": 0.00016722105263157895, + "logits/chosen": 13.2506685256958, + "logits/rejected": 13.2506685256958, + "logps/chosen": -3777.9892578125, + "logps/rejected": -3777.9892578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9298095703125, + "rewards/margins": 0.0, + "rewards/rejected": -374.9298095703125, + "step": 1565 + }, + { + "epoch": 16.48421052631579, + "grad_norm": 1.2744127388941706e-06, + "learning_rate": 0.0001672, + "logits/chosen": 13.240317344665527, + "logits/rejected": 13.240317344665527, + "logps/chosen": -2969.0400390625, + "logps/rejected": -2969.0400390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1778259277344, + "rewards/margins": 0.0, + "rewards/rejected": -294.1778259277344, + "step": 1566 + }, + { + "epoch": 16.49473684210526, + "grad_norm": 2.0518414203252178e-06, + "learning_rate": 0.00016717894736842105, + "logits/chosen": 13.24996280670166, + "logits/rejected": 13.24996280670166, + "logps/chosen": -3996.28125, + "logps/rejected": -3996.28125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8008117675781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8008117675781, + "step": 1567 + }, + { + "epoch": 16.50526315789474, + "grad_norm": 1.7397899227944436e-06, + "learning_rate": 0.00016715789473684212, + "logits/chosen": 13.262529373168945, + "logits/rejected": 13.262529373168945, + "logps/chosen": -3777.9306640625, + "logps/rejected": -3777.9306640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9239501953125, + "rewards/margins": 0.0, + "rewards/rejected": -374.9239501953125, + "step": 1568 + }, + { + "epoch": 16.51578947368421, + "grad_norm": 3.3266060199821368e-06, + "learning_rate": 0.00016713684210526317, + "logits/chosen": 13.296720504760742, + "logits/rejected": 13.296720504760742, + "logps/chosen": -4878.798828125, + "logps/rejected": -4878.798828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.03271484375, + "rewards/margins": 0.0, + "rewards/rejected": -485.03271484375, + "step": 1569 + }, + { + "epoch": 16.526315789473685, + "grad_norm": 1.233409875567304e-06, + "learning_rate": 0.00016711578947368422, + "logits/chosen": 13.269818305969238, + "logits/rejected": 13.269818305969238, + "logps/chosen": -4328.2216796875, + "logps/rejected": -4328.2216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.6372985839844, + "rewards/margins": 0.0, + "rewards/rejected": -429.6372985839844, + "step": 1570 + }, + { + "epoch": 16.53684210526316, + "grad_norm": 1.755433572725451e-06, + "learning_rate": 0.00016709473684210527, + "logits/chosen": 13.225573539733887, + "logits/rejected": 13.225573539733887, + "logps/chosen": -3996.615234375, + "logps/rejected": -3996.615234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8341979980469, + "rewards/margins": 0.0, + "rewards/rejected": -396.8341979980469, + "step": 1571 + }, + { + "epoch": 16.54736842105263, + "grad_norm": 9.790144304133719e-07, + "learning_rate": 0.00016707368421052632, + "logits/chosen": 13.223475456237793, + "logits/rejected": 13.223475456237793, + "logps/chosen": -3545.7998046875, + "logps/rejected": -3545.7998046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5541076660156, + "rewards/margins": 0.0, + "rewards/rejected": -351.5541076660156, + "step": 1572 + }, + { + "epoch": 16.557894736842105, + "grad_norm": 1.1840425031550694e-06, + "learning_rate": 0.00016705263157894737, + "logits/chosen": 13.198973655700684, + "logits/rejected": 13.198973655700684, + "logps/chosen": -3997.5546875, + "logps/rejected": -3997.5546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.92816162109375, + "rewards/margins": 0.0, + "rewards/rejected": -396.92816162109375, + "step": 1573 + }, + { + "epoch": 16.568421052631578, + "grad_norm": 1.8935512571260915e-06, + "learning_rate": 0.00016703157894736842, + "logits/chosen": 13.195361137390137, + "logits/rejected": 13.195361137390137, + "logps/chosen": -3758.8916015625, + "logps/rejected": -3758.8916015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9708557128906, + "rewards/margins": 0.0, + "rewards/rejected": -372.9708557128906, + "step": 1574 + }, + { + "epoch": 16.57894736842105, + "grad_norm": 1.1107524642284261e-06, + "learning_rate": 0.0001670105263157895, + "logits/chosen": 13.163818359375, + "logits/rejected": 13.163818359375, + "logps/chosen": -2673.5859375, + "logps/rejected": -2673.5859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5769958496094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5769958496094, + "step": 1575 + }, + { + "epoch": 16.589473684210525, + "grad_norm": 1.747143869579304e-06, + "learning_rate": 0.00016698947368421055, + "logits/chosen": 13.229965209960938, + "logits/rejected": 13.229965209960938, + "logps/chosen": -5176.796875, + "logps/rejected": -5176.796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7412719726562, + "rewards/margins": 0.0, + "rewards/rejected": -514.7412719726562, + "step": 1576 + }, + { + "epoch": 16.6, + "grad_norm": 1.7563052097102627e-06, + "learning_rate": 0.0001669684210526316, + "logits/chosen": 13.214452743530273, + "logits/rejected": 13.214452743530273, + "logps/chosen": -4879.61328125, + "logps/rejected": -4879.61328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1141662597656, + "rewards/margins": 0.0, + "rewards/rejected": -485.1141662597656, + "step": 1577 + }, + { + "epoch": 16.610526315789475, + "grad_norm": 1.0877309932766366e-06, + "learning_rate": 0.00016694736842105262, + "logits/chosen": 13.156359672546387, + "logits/rejected": 13.156359672546387, + "logps/chosen": -3998.810546875, + "logps/rejected": -3998.810546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0537414550781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0537414550781, + "step": 1578 + }, + { + "epoch": 16.621052631578948, + "grad_norm": 1.4249728792492533e-06, + "learning_rate": 0.0001669263157894737, + "logits/chosen": 13.17270278930664, + "logits/rejected": 13.17270278930664, + "logps/chosen": -4287.84228515625, + "logps/rejected": -4287.84228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.98687744140625, + "rewards/margins": 0.0, + "rewards/rejected": -425.98687744140625, + "step": 1579 + }, + { + "epoch": 16.63157894736842, + "grad_norm": 1.1871371725646895e-06, + "learning_rate": 0.00016690526315789474, + "logits/chosen": 13.196175575256348, + "logits/rejected": 13.196175575256348, + "logps/chosen": -4880.28125, + "logps/rejected": -4880.28125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.18096923828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.18096923828125, + "step": 1580 + }, + { + "epoch": 16.642105263157895, + "grad_norm": 1.0823998763953568e-06, + "learning_rate": 0.0001668842105263158, + "logits/chosen": 13.146787643432617, + "logits/rejected": 13.146787643432617, + "logps/chosen": -3759.208984375, + "logps/rejected": -3759.208984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0025939941406, + "rewards/margins": 0.0, + "rewards/rejected": -373.0025939941406, + "step": 1581 + }, + { + "epoch": 16.652631578947368, + "grad_norm": 1.1689540997394943e-06, + "learning_rate": 0.00016686315789473687, + "logits/chosen": 13.184684753417969, + "logits/rejected": 13.184684753417969, + "logps/chosen": -4880.6171875, + "logps/rejected": -4880.6171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2145690917969, + "rewards/margins": 0.0, + "rewards/rejected": -485.2145690917969, + "step": 1582 + }, + { + "epoch": 16.66315789473684, + "grad_norm": 1.5519713087996934e-06, + "learning_rate": 0.00016684210526315792, + "logits/chosen": 13.149711608886719, + "logits/rejected": 13.149711608886719, + "logps/chosen": -4288.66259765625, + "logps/rejected": -4288.66259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.06890869140625, + "rewards/margins": 0.0, + "rewards/rejected": -426.06890869140625, + "step": 1583 + }, + { + "epoch": 16.673684210526314, + "grad_norm": 1.1183192327735014e-06, + "learning_rate": 0.00016682105263157894, + "logits/chosen": 13.122364044189453, + "logits/rejected": 13.122364044189453, + "logps/chosen": -2968.58203125, + "logps/rejected": -2968.58203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.13201904296875, + "rewards/margins": 0.0, + "rewards/rejected": -294.13201904296875, + "step": 1584 + }, + { + "epoch": 16.68421052631579, + "grad_norm": 1.2538896498881513e-06, + "learning_rate": 0.0001668, + "logits/chosen": 13.12143611907959, + "logits/rejected": 13.12143611907959, + "logps/chosen": -3999.919921875, + "logps/rejected": -3999.919921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1646728515625, + "rewards/margins": 0.0, + "rewards/rejected": -397.1646728515625, + "step": 1585 + }, + { + "epoch": 16.694736842105264, + "grad_norm": 2.0074051008123206e-06, + "learning_rate": 0.00016677894736842107, + "logits/chosen": 13.181496620178223, + "logits/rejected": 13.181496620178223, + "logps/chosen": -5175.79296875, + "logps/rejected": -5175.79296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.640869140625, + "rewards/margins": 0.0, + "rewards/rejected": -514.640869140625, + "step": 1586 + }, + { + "epoch": 16.705263157894738, + "grad_norm": 8.466049621347338e-07, + "learning_rate": 0.00016675789473684211, + "logits/chosen": 13.109195709228516, + "logits/rejected": 13.109195709228516, + "logps/chosen": -2673.37109375, + "logps/rejected": -2673.37109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5555114746094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5555114746094, + "step": 1587 + }, + { + "epoch": 16.71578947368421, + "grad_norm": 1.3248028380985488e-06, + "learning_rate": 0.00016673684210526316, + "logits/chosen": 13.134121894836426, + "logits/rejected": 13.134121894836426, + "logps/chosen": -3778.740234375, + "logps/rejected": -3778.740234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0049133300781, + "rewards/margins": 0.0, + "rewards/rejected": -375.0049133300781, + "step": 1588 + }, + { + "epoch": 16.726315789473684, + "grad_norm": 1.9373460418137256e-06, + "learning_rate": 0.0001667157894736842, + "logits/chosen": 13.154068946838379, + "logits/rejected": 13.154068946838379, + "logps/chosen": -4326.181640625, + "logps/rejected": -4326.181640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4333190917969, + "rewards/margins": 0.0, + "rewards/rejected": -429.4333190917969, + "step": 1589 + }, + { + "epoch": 16.736842105263158, + "grad_norm": 1.785628114703286e-06, + "learning_rate": 0.0001666947368421053, + "logits/chosen": 13.187432289123535, + "logits/rejected": 13.187432289123535, + "logps/chosen": -5175.6162109375, + "logps/rejected": -5175.6162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6232299804688, + "rewards/margins": 0.0, + "rewards/rejected": -514.6232299804688, + "step": 1590 + }, + { + "epoch": 16.74736842105263, + "grad_norm": 9.664781828178093e-07, + "learning_rate": 0.0001666736842105263, + "logits/chosen": 13.145255088806152, + "logits/rejected": 13.145255088806152, + "logps/chosen": -3542.837890625, + "logps/rejected": -3542.837890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2579040527344, + "rewards/margins": 0.0, + "rewards/rejected": -351.2579040527344, + "step": 1591 + }, + { + "epoch": 16.757894736842104, + "grad_norm": 8.376316600333666e-07, + "learning_rate": 0.00016665263157894736, + "logits/chosen": 13.132930755615234, + "logits/rejected": 13.132930755615234, + "logps/chosen": -2673.841796875, + "logps/rejected": -2673.841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.60260009765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.60260009765625, + "step": 1592 + }, + { + "epoch": 16.768421052631577, + "grad_norm": 2.3407110347761773e-06, + "learning_rate": 0.00016663157894736844, + "logits/chosen": 13.215516090393066, + "logits/rejected": 13.215516090393066, + "logps/chosen": -5175.9033203125, + "logps/rejected": -5175.9033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6519165039062, + "rewards/margins": 0.0, + "rewards/rejected": -514.6519165039062, + "step": 1593 + }, + { + "epoch": 16.778947368421054, + "grad_norm": 9.087800094675913e-07, + "learning_rate": 0.00016661052631578949, + "logits/chosen": 13.177078247070312, + "logits/rejected": 13.177078247070312, + "logps/chosen": -3543.1328125, + "logps/rejected": -3543.1328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.28741455078125, + "rewards/margins": 0.0, + "rewards/rejected": -351.28741455078125, + "step": 1594 + }, + { + "epoch": 16.789473684210527, + "grad_norm": 1.2974768424101057e-06, + "learning_rate": 0.00016658947368421054, + "logits/chosen": 13.229546546936035, + "logits/rejected": 13.229546546936035, + "logps/chosen": -4881.5537109375, + "logps/rejected": -4881.5537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3081970214844, + "rewards/margins": 0.0, + "rewards/rejected": -485.3081970214844, + "step": 1595 + }, + { + "epoch": 16.8, + "grad_norm": 8.345022592948226e-07, + "learning_rate": 0.00016656842105263158, + "logits/chosen": 13.180034637451172, + "logits/rejected": 13.180034637451172, + "logps/chosen": -2673.927734375, + "logps/rejected": -2673.927734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6111755371094, + "rewards/margins": 0.0, + "rewards/rejected": -264.6111755371094, + "step": 1596 + }, + { + "epoch": 16.810526315789474, + "grad_norm": 1.6810772649478167e-06, + "learning_rate": 0.00016654736842105263, + "logits/chosen": 13.26390552520752, + "logits/rejected": 13.26390552520752, + "logps/chosen": -5177.091796875, + "logps/rejected": -5177.091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.770751953125, + "rewards/margins": 0.0, + "rewards/rejected": -514.770751953125, + "step": 1597 + }, + { + "epoch": 16.821052631578947, + "grad_norm": 1.2773365369866951e-06, + "learning_rate": 0.00016652631578947368, + "logits/chosen": 13.213223457336426, + "logits/rejected": 13.213223457336426, + "logps/chosen": -3998.63671875, + "logps/rejected": -3998.63671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0363464355469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0363464355469, + "step": 1598 + }, + { + "epoch": 16.83157894736842, + "grad_norm": 8.511328815075103e-07, + "learning_rate": 0.00016650526315789473, + "logits/chosen": 13.210891723632812, + "logits/rejected": 13.210891723632812, + "logps/chosen": -2674.048828125, + "logps/rejected": -2674.048828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.623291015625, + "rewards/margins": 0.0, + "rewards/rejected": -264.623291015625, + "step": 1599 + }, + { + "epoch": 16.842105263157894, + "grad_norm": 1.1729697462214972e-06, + "learning_rate": 0.0001664842105263158, + "logits/chosen": 13.278419494628906, + "logits/rejected": 13.278419494628906, + "logps/chosen": -4881.03466796875, + "logps/rejected": -4881.03466796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2563171386719, + "rewards/margins": 0.0, + "rewards/rejected": -485.2563171386719, + "step": 1600 + }, + { + "epoch": 16.842105263157894, + "eval_logits/chosen": 13.265612602233887, + "eval_logits/rejected": 13.265612602233887, + "eval_logps/chosen": -4311.87646484375, + "eval_logps/rejected": -4311.87646484375, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.28448486328125, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.28448486328125, + "eval_runtime": 4.2993, + "eval_samples_per_second": 2.326, + "eval_steps_per_second": 2.326, + "step": 1600 + }, + { + "epoch": 16.852631578947367, + "grad_norm": 1.272752911063435e-06, + "learning_rate": 0.00016646315789473686, + "logits/chosen": 13.282668113708496, + "logits/rejected": 13.282668113708496, + "logps/chosen": -4881.19921875, + "logps/rejected": -4881.19921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.27276611328125, + "rewards/margins": 0.0, + "rewards/rejected": -485.27276611328125, + "step": 1601 + }, + { + "epoch": 16.863157894736844, + "grad_norm": 1.1911339470316307e-06, + "learning_rate": 0.0001664421052631579, + "logits/chosen": 13.232366561889648, + "logits/rejected": 13.232366561889648, + "logps/chosen": -3998.216796875, + "logps/rejected": -3998.216796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9943542480469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9943542480469, + "step": 1602 + }, + { + "epoch": 16.873684210526317, + "grad_norm": 1.1970423656748608e-06, + "learning_rate": 0.00016642105263157896, + "logits/chosen": 13.231376647949219, + "logits/rejected": 13.231376647949219, + "logps/chosen": -3998.2421875, + "logps/rejected": -3998.2421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9969177246094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9969177246094, + "step": 1603 + }, + { + "epoch": 16.88421052631579, + "grad_norm": 1.3990925253892783e-06, + "learning_rate": 0.0001664, + "logits/chosen": 13.240729331970215, + "logits/rejected": 13.240729331970215, + "logps/chosen": -3543.373046875, + "logps/rejected": -3543.373046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3114318847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3114318847656, + "step": 1604 + }, + { + "epoch": 16.894736842105264, + "grad_norm": 1.425261757503904e-06, + "learning_rate": 0.00016637894736842106, + "logits/chosen": 13.248808860778809, + "logits/rejected": 13.248808860778809, + "logps/chosen": -4288.2763671875, + "logps/rejected": -4288.2763671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0302734375, + "rewards/margins": 0.0, + "rewards/rejected": -426.0302734375, + "step": 1605 + }, + { + "epoch": 16.905263157894737, + "grad_norm": 2.318427050340688e-06, + "learning_rate": 0.0001663578947368421, + "logits/chosen": 13.25756549835205, + "logits/rejected": 13.25756549835205, + "logps/chosen": -4326.1875, + "logps/rejected": -4326.1875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.43389892578125, + "rewards/margins": 0.0, + "rewards/rejected": -429.43389892578125, + "step": 1606 + }, + { + "epoch": 16.91578947368421, + "grad_norm": 8.237370252572873e-07, + "learning_rate": 0.00016633684210526318, + "logits/chosen": 13.212408065795898, + "logits/rejected": 13.212408065795898, + "logps/chosen": -2674.447265625, + "logps/rejected": -2674.447265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.66314697265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.66314697265625, + "step": 1607 + }, + { + "epoch": 16.926315789473684, + "grad_norm": 1.3481775340551394e-06, + "learning_rate": 0.00016631578947368423, + "logits/chosen": 13.270916938781738, + "logits/rejected": 13.270916938781738, + "logps/chosen": -4881.7822265625, + "logps/rejected": -4881.7822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3310546875, + "rewards/margins": 0.0, + "rewards/rejected": -485.3310546875, + "step": 1608 + }, + { + "epoch": 16.936842105263157, + "grad_norm": 1.6446851986984257e-06, + "learning_rate": 0.00016629473684210528, + "logits/chosen": 13.268778800964355, + "logits/rejected": 13.268778800964355, + "logps/chosen": -4881.5537109375, + "logps/rejected": -4881.5537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3081970214844, + "rewards/margins": 0.0, + "rewards/rejected": -485.3081970214844, + "step": 1609 + }, + { + "epoch": 16.94736842105263, + "grad_norm": 8.176473329513101e-07, + "learning_rate": 0.0001662736842105263, + "logits/chosen": 13.206258773803711, + "logits/rejected": 13.206258773803711, + "logps/chosen": -2674.796875, + "logps/rejected": -2674.796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6980895996094, + "rewards/margins": 0.0, + "rewards/rejected": -264.6980895996094, + "step": 1610 + }, + { + "epoch": 16.957894736842107, + "grad_norm": 2.6329346383136e-06, + "learning_rate": 0.00016625263157894738, + "logits/chosen": 13.271363258361816, + "logits/rejected": 13.271363258361816, + "logps/chosen": -5177.3896484375, + "logps/rejected": -5177.3896484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.800537109375, + "rewards/margins": 0.0, + "rewards/rejected": -514.800537109375, + "step": 1611 + }, + { + "epoch": 16.96842105263158, + "grad_norm": 9.865083256954676e-07, + "learning_rate": 0.00016623157894736843, + "logits/chosen": 13.2173433303833, + "logits/rejected": 13.2173433303833, + "logps/chosen": -3758.169921875, + "logps/rejected": -3758.169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.898681640625, + "rewards/margins": 0.0, + "rewards/rejected": -372.898681640625, + "step": 1612 + }, + { + "epoch": 16.978947368421053, + "grad_norm": 9.488172167948505e-07, + "learning_rate": 0.00016621052631578948, + "logits/chosen": 13.21692180633545, + "logits/rejected": 13.21692180633545, + "logps/chosen": -3543.259765625, + "logps/rejected": -3543.259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.30010986328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.30010986328125, + "step": 1613 + }, + { + "epoch": 16.989473684210527, + "grad_norm": 1.335289198323153e-06, + "learning_rate": 0.00016618947368421053, + "logits/chosen": 13.263039588928223, + "logits/rejected": 13.263039588928223, + "logps/chosen": -5177.6787109375, + "logps/rejected": -5177.6787109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8294677734375, + "rewards/margins": 0.0, + "rewards/rejected": -514.8294677734375, + "step": 1614 + }, + { + "epoch": 17.0, + "grad_norm": 1.1219150337637984e-06, + "learning_rate": 0.0001661684210526316, + "logits/chosen": 13.25462532043457, + "logits/rejected": 13.25462532043457, + "logps/chosen": -4882.509765625, + "logps/rejected": -4882.509765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.40380859375, + "rewards/margins": 0.0, + "rewards/rejected": -485.40380859375, + "step": 1615 + }, + { + "epoch": 17.010526315789473, + "grad_norm": 1.3002828609387507e-06, + "learning_rate": 0.00016614736842105262, + "logits/chosen": 13.218510627746582, + "logits/rejected": 13.218510627746582, + "logps/chosen": -3543.177734375, + "logps/rejected": -3543.177734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2919006347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2919006347656, + "step": 1616 + }, + { + "epoch": 17.021052631578947, + "grad_norm": 1.2810995713152806e-06, + "learning_rate": 0.00016612631578947367, + "logits/chosen": 13.261174201965332, + "logits/rejected": 13.261174201965332, + "logps/chosen": -4882.78515625, + "logps/rejected": -4882.78515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.4313659667969, + "rewards/margins": 0.0, + "rewards/rejected": -485.4313659667969, + "step": 1617 + }, + { + "epoch": 17.03157894736842, + "grad_norm": 1.475021804253629e-06, + "learning_rate": 0.00016610526315789475, + "logits/chosen": 13.213229179382324, + "logits/rejected": 13.213229179382324, + "logps/chosen": -2966.763671875, + "logps/rejected": -2966.763671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9501647949219, + "rewards/margins": 0.0, + "rewards/rejected": -293.9501647949219, + "step": 1618 + }, + { + "epoch": 17.042105263157893, + "grad_norm": 9.967370715457946e-07, + "learning_rate": 0.0001660842105263158, + "logits/chosen": 13.27358341217041, + "logits/rejected": 13.27358341217041, + "logps/chosen": -4882.57568359375, + "logps/rejected": -4882.57568359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.410400390625, + "rewards/margins": 0.0, + "rewards/rejected": -485.410400390625, + "step": 1619 + }, + { + "epoch": 17.05263157894737, + "grad_norm": 1.3451509630613145e-06, + "learning_rate": 0.00016606315789473685, + "logits/chosen": 13.245838165283203, + "logits/rejected": 13.245838165283203, + "logps/chosen": -3776.876953125, + "logps/rejected": -3776.876953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8185729980469, + "rewards/margins": 0.0, + "rewards/rejected": -374.8185729980469, + "step": 1620 + }, + { + "epoch": 17.063157894736843, + "grad_norm": 8.665405744068266e-07, + "learning_rate": 0.0001660421052631579, + "logits/chosen": 13.248611450195312, + "logits/rejected": 13.248611450195312, + "logps/chosen": -3543.697265625, + "logps/rejected": -3543.697265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3438415527344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3438415527344, + "step": 1621 + }, + { + "epoch": 17.073684210526316, + "grad_norm": 1.7614240732655162e-06, + "learning_rate": 0.00016602105263157897, + "logits/chosen": 13.240443229675293, + "logits/rejected": 13.240443229675293, + "logps/chosen": -3997.587890625, + "logps/rejected": -3997.587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9314880371094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9314880371094, + "step": 1622 + }, + { + "epoch": 17.08421052631579, + "grad_norm": 8.349593372258823e-07, + "learning_rate": 0.000166, + "logits/chosen": 13.254756927490234, + "logits/rejected": 13.254756927490234, + "logps/chosen": -3543.798828125, + "logps/rejected": -3543.798828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.35400390625, + "rewards/margins": 0.0, + "rewards/rejected": -351.35400390625, + "step": 1623 + }, + { + "epoch": 17.094736842105263, + "grad_norm": 9.612680287318653e-07, + "learning_rate": 0.00016597894736842105, + "logits/chosen": 13.25146770477295, + "logits/rejected": 13.25146770477295, + "logps/chosen": -3758.263671875, + "logps/rejected": -3758.263671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9080505371094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9080505371094, + "step": 1624 + }, + { + "epoch": 17.105263157894736, + "grad_norm": 1.274058945455181e-06, + "learning_rate": 0.00016595789473684212, + "logits/chosen": 13.292013168334961, + "logits/rejected": 13.292013168334961, + "logps/chosen": -4883.044921875, + "logps/rejected": -4883.044921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.45733642578125, + "rewards/margins": 0.0, + "rewards/rejected": -485.45733642578125, + "step": 1625 + }, + { + "epoch": 17.11578947368421, + "grad_norm": 1.0444042572999024e-06, + "learning_rate": 0.00016593684210526317, + "logits/chosen": 13.24094295501709, + "logits/rejected": 13.24094295501709, + "logps/chosen": -2967.138671875, + "logps/rejected": -2967.138671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9876708984375, + "rewards/margins": 0.0, + "rewards/rejected": -293.9876708984375, + "step": 1626 + }, + { + "epoch": 17.126315789473683, + "grad_norm": 8.29914313271729e-07, + "learning_rate": 0.00016591578947368422, + "logits/chosen": 13.25707721710205, + "logits/rejected": 13.25707721710205, + "logps/chosen": -3544.16015625, + "logps/rejected": -3544.16015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.39013671875, + "rewards/margins": 0.0, + "rewards/rejected": -351.39013671875, + "step": 1627 + }, + { + "epoch": 17.13684210526316, + "grad_norm": 8.177157724276185e-07, + "learning_rate": 0.00016589473684210527, + "logits/chosen": 13.260442733764648, + "logits/rejected": 13.260442733764648, + "logps/chosen": -3544.287109375, + "logps/rejected": -3544.287109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.40283203125, + "rewards/margins": 0.0, + "rewards/rejected": -351.40283203125, + "step": 1628 + }, + { + "epoch": 17.147368421052633, + "grad_norm": 8.585454338572163e-07, + "learning_rate": 0.00016587368421052632, + "logits/chosen": 13.243003845214844, + "logits/rejected": 13.243003845214844, + "logps/chosen": -2673.169921875, + "logps/rejected": -2673.169921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.535400390625, + "rewards/margins": 0.0, + "rewards/rejected": -264.535400390625, + "step": 1629 + }, + { + "epoch": 17.157894736842106, + "grad_norm": 8.492467600262898e-07, + "learning_rate": 0.00016585263157894737, + "logits/chosen": 13.246129035949707, + "logits/rejected": 13.246129035949707, + "logps/chosen": -2673.228515625, + "logps/rejected": -2673.228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.541259765625, + "rewards/margins": 0.0, + "rewards/rejected": -264.541259765625, + "step": 1630 + }, + { + "epoch": 17.16842105263158, + "grad_norm": 7.809761086718936e-07, + "learning_rate": 0.00016583157894736842, + "logits/chosen": 13.269388198852539, + "logits/rejected": 13.269388198852539, + "logps/chosen": -3544.669921875, + "logps/rejected": -3544.669921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4411315917969, + "rewards/margins": 0.0, + "rewards/rejected": -351.4411315917969, + "step": 1631 + }, + { + "epoch": 17.178947368421053, + "grad_norm": 7.648363862244878e-07, + "learning_rate": 0.0001658105263157895, + "logits/chosen": 13.272018432617188, + "logits/rejected": 13.272018432617188, + "logps/chosen": -3544.994140625, + "logps/rejected": -3544.994140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4735412597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.4735412597656, + "step": 1632 + }, + { + "epoch": 17.189473684210526, + "grad_norm": 1.338182528343168e-06, + "learning_rate": 0.00016578947368421054, + "logits/chosen": 13.297073364257812, + "logits/rejected": 13.297073364257812, + "logps/chosen": -4325.95703125, + "logps/rejected": -4325.95703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4108581542969, + "rewards/margins": 0.0, + "rewards/rejected": -429.4108581542969, + "step": 1633 + }, + { + "epoch": 17.2, + "grad_norm": 1.7346213780911057e-06, + "learning_rate": 0.0001657684210526316, + "logits/chosen": 13.26230525970459, + "logits/rejected": 13.26230525970459, + "logps/chosen": -3997.34765625, + "logps/rejected": -3997.34765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9074401855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9074401855469, + "step": 1634 + }, + { + "epoch": 17.210526315789473, + "grad_norm": 1.5153933645706275e-06, + "learning_rate": 0.00016574736842105264, + "logits/chosen": 13.256904602050781, + "logits/rejected": 13.256904602050781, + "logps/chosen": -3997.357421875, + "logps/rejected": -3997.357421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9084167480469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9084167480469, + "step": 1635 + }, + { + "epoch": 17.221052631578946, + "grad_norm": 8.730449962968123e-07, + "learning_rate": 0.0001657263157894737, + "logits/chosen": 13.260743141174316, + "logits/rejected": 13.260743141174316, + "logps/chosen": -3546.0361328125, + "logps/rejected": -3546.0361328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5777282714844, + "rewards/margins": 0.0, + "rewards/rejected": -351.5777282714844, + "step": 1636 + }, + { + "epoch": 17.231578947368423, + "grad_norm": 1.2017012522846926e-06, + "learning_rate": 0.00016570526315789474, + "logits/chosen": 13.289908409118652, + "logits/rejected": 13.289908409118652, + "logps/chosen": -4881.3134765625, + "logps/rejected": -4881.3134765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2841796875, + "rewards/margins": 0.0, + "rewards/rejected": -485.2841796875, + "step": 1637 + }, + { + "epoch": 17.242105263157896, + "grad_norm": 1.467969013901893e-06, + "learning_rate": 0.0001656842105263158, + "logits/chosen": 13.24655532836914, + "logits/rejected": 13.24655532836914, + "logps/chosen": -3777.841796875, + "logps/rejected": -3777.841796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9150695800781, + "rewards/margins": 0.0, + "rewards/rejected": -374.9150695800781, + "step": 1638 + }, + { + "epoch": 17.25263157894737, + "grad_norm": 1.6740277715143748e-06, + "learning_rate": 0.00016566315789473687, + "logits/chosen": 13.23279857635498, + "logits/rejected": 13.23279857635498, + "logps/chosen": -3758.751953125, + "logps/rejected": -3758.751953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9568786621094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9568786621094, + "step": 1639 + }, + { + "epoch": 17.263157894736842, + "grad_norm": 1.254528115168796e-06, + "learning_rate": 0.00016564210526315792, + "logits/chosen": 13.232345581054688, + "logits/rejected": 13.232345581054688, + "logps/chosen": -3546.5546875, + "logps/rejected": -3546.5546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.6296081542969, + "rewards/margins": 0.0, + "rewards/rejected": -351.6296081542969, + "step": 1640 + }, + { + "epoch": 17.273684210526316, + "grad_norm": 8.505404025527241e-07, + "learning_rate": 0.00016562105263157896, + "logits/chosen": 13.207818031311035, + "logits/rejected": 13.207818031311035, + "logps/chosen": -2674.3125, + "logps/rejected": -2674.3125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.649658203125, + "rewards/margins": 0.0, + "rewards/rejected": -264.649658203125, + "step": 1641 + }, + { + "epoch": 17.28421052631579, + "grad_norm": 1.5906676935628639e-06, + "learning_rate": 0.0001656, + "logits/chosen": 13.275137901306152, + "logits/rejected": 13.275137901306152, + "logps/chosen": -5175.3525390625, + "logps/rejected": -5175.3525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5968627929688, + "rewards/margins": 0.0, + "rewards/rejected": -514.5968627929688, + "step": 1642 + }, + { + "epoch": 17.294736842105262, + "grad_norm": 1.3361074024942354e-06, + "learning_rate": 0.00016557894736842106, + "logits/chosen": 13.26642894744873, + "logits/rejected": 13.26642894744873, + "logps/chosen": -4880.49853515625, + "logps/rejected": -4880.49853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.20269775390625, + "rewards/margins": 0.0, + "rewards/rejected": -485.20269775390625, + "step": 1643 + }, + { + "epoch": 17.305263157894736, + "grad_norm": 1.2978564427612582e-06, + "learning_rate": 0.0001655578947368421, + "logits/chosen": 13.253207206726074, + "logits/rejected": 13.253207206726074, + "logps/chosen": -4327.404296875, + "logps/rejected": -4327.404296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.5555725097656, + "rewards/margins": 0.0, + "rewards/rejected": -429.5555725097656, + "step": 1644 + }, + { + "epoch": 17.31578947368421, + "grad_norm": 9.405152354702295e-07, + "learning_rate": 0.00016553684210526316, + "logits/chosen": 13.229449272155762, + "logits/rejected": 13.229449272155762, + "logps/chosen": -3759.2548828125, + "logps/rejected": -3759.2548828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0071716308594, + "rewards/margins": 0.0, + "rewards/rejected": -373.0071716308594, + "step": 1645 + }, + { + "epoch": 17.326315789473686, + "grad_norm": 2.9235459351184545e-06, + "learning_rate": 0.0001655157894736842, + "logits/chosen": 13.281408309936523, + "logits/rejected": 13.281408309936523, + "logps/chosen": -5175.4765625, + "logps/rejected": -5175.4765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6092529296875, + "rewards/margins": 0.0, + "rewards/rejected": -514.6092529296875, + "step": 1646 + }, + { + "epoch": 17.33684210526316, + "grad_norm": 1.6358704897356802e-06, + "learning_rate": 0.0001654947368421053, + "logits/chosen": 13.272923469543457, + "logits/rejected": 13.272923469543457, + "logps/chosen": -4880.3291015625, + "logps/rejected": -4880.3291015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1857604980469, + "rewards/margins": 0.0, + "rewards/rejected": -485.1857604980469, + "step": 1647 + }, + { + "epoch": 17.347368421052632, + "grad_norm": 1.3586352451966377e-06, + "learning_rate": 0.0001654736842105263, + "logits/chosen": 13.220980644226074, + "logits/rejected": 13.220980644226074, + "logps/chosen": -3997.91015625, + "logps/rejected": -3997.91015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9637145996094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9637145996094, + "step": 1648 + }, + { + "epoch": 17.357894736842105, + "grad_norm": 1.3687122191186063e-06, + "learning_rate": 0.00016545263157894736, + "logits/chosen": 13.217361450195312, + "logits/rejected": 13.217361450195312, + "logps/chosen": -2968.2177734375, + "logps/rejected": -2968.2177734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0955810546875, + "rewards/margins": 0.0, + "rewards/rejected": -294.0955810546875, + "step": 1649 + }, + { + "epoch": 17.36842105263158, + "grad_norm": 1.3353009080674383e-06, + "learning_rate": 0.00016543157894736843, + "logits/chosen": 13.234546661376953, + "logits/rejected": 13.234546661376953, + "logps/chosen": -3778.525390625, + "logps/rejected": -3778.525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9834289550781, + "rewards/margins": 0.0, + "rewards/rejected": -374.9834289550781, + "step": 1650 + }, + { + "epoch": 17.36842105263158, + "eval_logits/chosen": 13.251673698425293, + "eval_logits/rejected": 13.251673698425293, + "eval_logps/chosen": -4311.59033203125, + "eval_logps/rejected": -4311.59033203125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.25592041015625, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.25592041015625, + "eval_runtime": 4.2501, + "eval_samples_per_second": 2.353, + "eval_steps_per_second": 2.353, + "step": 1650 + }, + { + "epoch": 17.378947368421052, + "grad_norm": 1.0953107221212122e-06, + "learning_rate": 0.00016541052631578948, + "logits/chosen": 13.23139762878418, + "logits/rejected": 13.23139762878418, + "logps/chosen": -3546.4423828125, + "logps/rejected": -3546.4423828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.6183776855469, + "rewards/margins": 0.0, + "rewards/rejected": -351.6183776855469, + "step": 1651 + }, + { + "epoch": 17.389473684210525, + "grad_norm": 1.3938832807980361e-06, + "learning_rate": 0.00016538947368421053, + "logits/chosen": 13.218032836914062, + "logits/rejected": 13.218032836914062, + "logps/chosen": -2968.259765625, + "logps/rejected": -2968.259765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.09979248046875, + "rewards/margins": 0.0, + "rewards/rejected": -294.09979248046875, + "step": 1652 + }, + { + "epoch": 17.4, + "grad_norm": 1.5852797332627233e-06, + "learning_rate": 0.00016536842105263158, + "logits/chosen": 13.246777534484863, + "logits/rejected": 13.246777534484863, + "logps/chosen": -4287.09619140625, + "logps/rejected": -4287.09619140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9122619628906, + "rewards/margins": 0.0, + "rewards/rejected": -425.9122619628906, + "step": 1653 + }, + { + "epoch": 17.410526315789475, + "grad_norm": 1.7589213712199125e-06, + "learning_rate": 0.00016534736842105263, + "logits/chosen": 13.286881446838379, + "logits/rejected": 13.286881446838379, + "logps/chosen": -5176.388671875, + "logps/rejected": -5176.388671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.700439453125, + "rewards/margins": 0.0, + "rewards/rejected": -514.700439453125, + "step": 1654 + }, + { + "epoch": 17.42105263157895, + "grad_norm": 1.5455976836165064e-06, + "learning_rate": 0.00016532631578947368, + "logits/chosen": 13.230978965759277, + "logits/rejected": 13.230978965759277, + "logps/chosen": -3998.046875, + "logps/rejected": -3998.046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9773864746094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9773864746094, + "step": 1655 + }, + { + "epoch": 17.431578947368422, + "grad_norm": 1.4634289300374803e-06, + "learning_rate": 0.00016530526315789473, + "logits/chosen": 13.22904109954834, + "logits/rejected": 13.22904109954834, + "logps/chosen": -3998.103515625, + "logps/rejected": -3998.103515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9830322265625, + "rewards/margins": 0.0, + "rewards/rejected": -396.9830322265625, + "step": 1656 + }, + { + "epoch": 17.442105263157895, + "grad_norm": 1.4717924159413087e-06, + "learning_rate": 0.0001652842105263158, + "logits/chosen": 13.281997680664062, + "logits/rejected": 13.281997680664062, + "logps/chosen": -5176.4345703125, + "logps/rejected": -5176.4345703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7050170898438, + "rewards/margins": 0.0, + "rewards/rejected": -514.7050170898438, + "step": 1657 + }, + { + "epoch": 17.45263157894737, + "grad_norm": 8.434948881586024e-07, + "learning_rate": 0.00016526315789473686, + "logits/chosen": 13.211052894592285, + "logits/rejected": 13.211052894592285, + "logps/chosen": -2673.8974609375, + "logps/rejected": -2673.8974609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.608154296875, + "rewards/margins": 0.0, + "rewards/rejected": -264.608154296875, + "step": 1658 + }, + { + "epoch": 17.46315789473684, + "grad_norm": 8.573423997404461e-07, + "learning_rate": 0.0001652421052631579, + "logits/chosen": 13.206940650939941, + "logits/rejected": 13.206940650939941, + "logps/chosen": -2673.7265625, + "logps/rejected": -2673.7265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.591064453125, + "rewards/margins": 0.0, + "rewards/rejected": -264.591064453125, + "step": 1659 + }, + { + "epoch": 17.473684210526315, + "grad_norm": 1.327583049715031e-06, + "learning_rate": 0.00016522105263157895, + "logits/chosen": 13.2329740524292, + "logits/rejected": 13.2329740524292, + "logps/chosen": -4288.6162109375, + "logps/rejected": -4288.6162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.06427001953125, + "rewards/margins": 0.0, + "rewards/rejected": -426.06427001953125, + "step": 1660 + }, + { + "epoch": 17.48421052631579, + "grad_norm": 1.3415901776170358e-06, + "learning_rate": 0.0001652, + "logits/chosen": 13.227052688598633, + "logits/rejected": 13.227052688598633, + "logps/chosen": -4288.80615234375, + "logps/rejected": -4288.80615234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.083251953125, + "rewards/margins": 0.0, + "rewards/rejected": -426.083251953125, + "step": 1661 + }, + { + "epoch": 17.49473684210526, + "grad_norm": 1.8072954617309733e-06, + "learning_rate": 0.00016517894736842105, + "logits/chosen": 13.255829811096191, + "logits/rejected": 13.255829811096191, + "logps/chosen": -5177.033203125, + "logps/rejected": -5177.033203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.764892578125, + "rewards/margins": 0.0, + "rewards/rejected": -514.764892578125, + "step": 1662 + }, + { + "epoch": 17.50526315789474, + "grad_norm": 1.4737507854079013e-06, + "learning_rate": 0.0001651578947368421, + "logits/chosen": 13.206585884094238, + "logits/rejected": 13.206585884094238, + "logps/chosen": -3544.9296875, + "logps/rejected": -3544.9296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.46710205078125, + "rewards/margins": 0.0, + "rewards/rejected": -351.46710205078125, + "step": 1663 + }, + { + "epoch": 17.51578947368421, + "grad_norm": 1.1374694395271945e-06, + "learning_rate": 0.00016513684210526318, + "logits/chosen": 13.189765930175781, + "logits/rejected": 13.189765930175781, + "logps/chosen": -3998.576171875, + "logps/rejected": -3998.576171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0303039550781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0303039550781, + "step": 1664 + }, + { + "epoch": 17.526315789473685, + "grad_norm": 1.2963898825546494e-06, + "learning_rate": 0.00016511578947368423, + "logits/chosen": 13.206446647644043, + "logits/rejected": 13.206446647644043, + "logps/chosen": -3778.44140625, + "logps/rejected": -3778.44140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.97503662109375, + "rewards/margins": 0.0, + "rewards/rejected": -374.97503662109375, + "step": 1665 + }, + { + "epoch": 17.53684210526316, + "grad_norm": 1.1174919336554012e-06, + "learning_rate": 0.00016509473684210528, + "logits/chosen": 13.18382740020752, + "logits/rejected": 13.18382740020752, + "logps/chosen": -3998.845703125, + "logps/rejected": -3998.845703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0572509765625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0572509765625, + "step": 1666 + }, + { + "epoch": 17.54736842105263, + "grad_norm": 1.3727382111028419e-06, + "learning_rate": 0.00016507368421052633, + "logits/chosen": 13.23823070526123, + "logits/rejected": 13.23823070526123, + "logps/chosen": -5177.3037109375, + "logps/rejected": -5177.3037109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7919311523438, + "rewards/margins": 0.0, + "rewards/rejected": -514.7919311523438, + "step": 1667 + }, + { + "epoch": 17.557894736842105, + "grad_norm": 1.0609696801111568e-06, + "learning_rate": 0.00016505263157894738, + "logits/chosen": 13.176491737365723, + "logits/rejected": 13.176491737365723, + "logps/chosen": -2968.94140625, + "logps/rejected": -2968.94140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1679382324219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1679382324219, + "step": 1668 + }, + { + "epoch": 17.568421052631578, + "grad_norm": 1.5158143469307106e-06, + "learning_rate": 0.00016503157894736843, + "logits/chosen": 13.192623138427734, + "logits/rejected": 13.192623138427734, + "logps/chosen": -3778.525390625, + "logps/rejected": -3778.525390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.9834289550781, + "rewards/margins": 0.0, + "rewards/rejected": -374.9834289550781, + "step": 1669 + }, + { + "epoch": 17.57894736842105, + "grad_norm": 1.6550102373003028e-06, + "learning_rate": 0.00016501052631578947, + "logits/chosen": 13.172411918640137, + "logits/rejected": 13.172411918640137, + "logps/chosen": -3999.150390625, + "logps/rejected": -3999.150390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0877380371094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0877380371094, + "step": 1670 + }, + { + "epoch": 17.589473684210525, + "grad_norm": 1.703960379018099e-06, + "learning_rate": 0.00016498947368421055, + "logits/chosen": 13.216747283935547, + "logits/rejected": 13.216747283935547, + "logps/chosen": -4879.1630859375, + "logps/rejected": -4879.1630859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.06915283203125, + "rewards/margins": 0.0, + "rewards/rejected": -485.06915283203125, + "step": 1671 + }, + { + "epoch": 17.6, + "grad_norm": 1.2773125490639359e-06, + "learning_rate": 0.0001649684210526316, + "logits/chosen": 13.197542190551758, + "logits/rejected": 13.197542190551758, + "logps/chosen": -4327.16796875, + "logps/rejected": -4327.16796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.5319519042969, + "rewards/margins": 0.0, + "rewards/rejected": -429.5319519042969, + "step": 1672 + }, + { + "epoch": 17.610526315789475, + "grad_norm": 8.086529987849644e-07, + "learning_rate": 0.00016494736842105265, + "logits/chosen": 13.149407386779785, + "logits/rejected": 13.149407386779785, + "logps/chosen": -2674.2578125, + "logps/rejected": -2674.2578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6441955566406, + "rewards/margins": 0.0, + "rewards/rejected": -264.6441955566406, + "step": 1673 + }, + { + "epoch": 17.621052631578948, + "grad_norm": 1.176727778329223e-06, + "learning_rate": 0.00016492631578947367, + "logits/chosen": 13.150078773498535, + "logits/rejected": 13.150078773498535, + "logps/chosen": -3999.626953125, + "logps/rejected": -3999.626953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1353759765625, + "rewards/margins": 0.0, + "rewards/rejected": -397.1353759765625, + "step": 1674 + }, + { + "epoch": 17.63157894736842, + "grad_norm": 9.85017550192424e-07, + "learning_rate": 0.00016490526315789475, + "logits/chosen": 13.16022777557373, + "logits/rejected": 13.16022777557373, + "logps/chosen": -3543.904296875, + "logps/rejected": -3543.904296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.36456298828125, + "rewards/margins": 0.0, + "rewards/rejected": -351.36456298828125, + "step": 1675 + }, + { + "epoch": 17.642105263157895, + "grad_norm": 8.099822252916056e-07, + "learning_rate": 0.0001648842105263158, + "logits/chosen": 13.134472846984863, + "logits/rejected": 13.134472846984863, + "logps/chosen": -2674.35546875, + "logps/rejected": -2674.35546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6539611816406, + "rewards/margins": 0.0, + "rewards/rejected": -264.6539611816406, + "step": 1676 + }, + { + "epoch": 17.652631578947368, + "grad_norm": 1.276584384868329e-06, + "learning_rate": 0.00016486315789473685, + "logits/chosen": 13.14843463897705, + "logits/rejected": 13.14843463897705, + "logps/chosen": -3758.064453125, + "logps/rejected": -3758.064453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8881530761719, + "rewards/margins": 0.0, + "rewards/rejected": -372.8881530761719, + "step": 1677 + }, + { + "epoch": 17.66315789473684, + "grad_norm": 8.137351983350527e-07, + "learning_rate": 0.0001648421052631579, + "logits/chosen": 13.129046440124512, + "logits/rejected": 13.129046440124512, + "logps/chosen": -2674.59375, + "logps/rejected": -2674.59375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.67779541015625, + "rewards/margins": 0.0, + "rewards/rejected": -264.67779541015625, + "step": 1678 + }, + { + "epoch": 17.673684210526314, + "grad_norm": 1.6141307241923641e-06, + "learning_rate": 0.00016482105263157897, + "logits/chosen": 13.169686317443848, + "logits/rejected": 13.169686317443848, + "logps/chosen": -4327.4873046875, + "logps/rejected": -4327.4873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.5638732910156, + "rewards/margins": 0.0, + "rewards/rejected": -429.5638732910156, + "step": 1679 + }, + { + "epoch": 17.68421052631579, + "grad_norm": 9.15808357149217e-07, + "learning_rate": 0.0001648, + "logits/chosen": 13.149499893188477, + "logits/rejected": 13.149499893188477, + "logps/chosen": -3543.8193359375, + "logps/rejected": -3543.8193359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3560485839844, + "rewards/margins": 0.0, + "rewards/rejected": -351.3560485839844, + "step": 1680 + }, + { + "epoch": 17.694736842105264, + "grad_norm": 1.5173853853411856e-06, + "learning_rate": 0.00016477894736842104, + "logits/chosen": 13.18626594543457, + "logits/rejected": 13.18626594543457, + "logps/chosen": -4879.27490234375, + "logps/rejected": -4879.27490234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.080322265625, + "rewards/margins": 0.0, + "rewards/rejected": -485.080322265625, + "step": 1681 + }, + { + "epoch": 17.705263157894738, + "grad_norm": 1.6809143517093617e-06, + "learning_rate": 0.00016475789473684212, + "logits/chosen": 13.190747261047363, + "logits/rejected": 13.190747261047363, + "logps/chosen": -4879.158203125, + "logps/rejected": -4879.158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.06866455078125, + "rewards/margins": 0.0, + "rewards/rejected": -485.06866455078125, + "step": 1682 + }, + { + "epoch": 17.71578947368421, + "grad_norm": 2.2709521090291673e-06, + "learning_rate": 0.00016473684210526317, + "logits/chosen": 13.205517768859863, + "logits/rejected": 13.205517768859863, + "logps/chosen": -5175.84228515625, + "logps/rejected": -5175.84228515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6458129882812, + "rewards/margins": 0.0, + "rewards/rejected": -514.6458129882812, + "step": 1683 + }, + { + "epoch": 17.726315789473684, + "grad_norm": 7.800434218552255e-07, + "learning_rate": 0.00016471578947368422, + "logits/chosen": 13.148836135864258, + "logits/rejected": 13.148836135864258, + "logps/chosen": -2675.6943359375, + "logps/rejected": -2675.6943359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.787841796875, + "rewards/margins": 0.0, + "rewards/rejected": -264.787841796875, + "step": 1684 + }, + { + "epoch": 17.736842105263158, + "grad_norm": 1.6296972944473964e-06, + "learning_rate": 0.00016469473684210527, + "logits/chosen": 13.183969497680664, + "logits/rejected": 13.183969497680664, + "logps/chosen": -4289.72900390625, + "logps/rejected": -4289.72900390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.175537109375, + "rewards/margins": 0.0, + "rewards/rejected": -426.175537109375, + "step": 1685 + }, + { + "epoch": 17.74736842105263, + "grad_norm": 1.197213350678794e-06, + "learning_rate": 0.00016467368421052632, + "logits/chosen": 13.215919494628906, + "logits/rejected": 13.215919494628906, + "logps/chosen": -4879.90234375, + "logps/rejected": -4879.90234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.14306640625, + "rewards/margins": 0.0, + "rewards/rejected": -485.14306640625, + "step": 1686 + }, + { + "epoch": 17.757894736842104, + "grad_norm": 1.3737014796788571e-06, + "learning_rate": 0.00016465263157894737, + "logits/chosen": 13.171225547790527, + "logits/rejected": 13.171225547790527, + "logps/chosen": -2968.603515625, + "logps/rejected": -2968.603515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1341552734375, + "rewards/margins": 0.0, + "rewards/rejected": -294.1341552734375, + "step": 1687 + }, + { + "epoch": 17.768421052631577, + "grad_norm": 8.144222078954044e-07, + "learning_rate": 0.00016463157894736842, + "logits/chosen": 13.171070098876953, + "logits/rejected": 13.171070098876953, + "logps/chosen": -2675.91015625, + "logps/rejected": -2675.91015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.8094177246094, + "rewards/margins": 0.0, + "rewards/rejected": -264.8094177246094, + "step": 1688 + }, + { + "epoch": 17.778947368421054, + "grad_norm": 1.3077759604129824e-06, + "learning_rate": 0.0001646105263157895, + "logits/chosen": 13.200596809387207, + "logits/rejected": 13.200596809387207, + "logps/chosen": -3779.298828125, + "logps/rejected": -3779.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0607604980469, + "rewards/margins": 0.0, + "rewards/rejected": -375.0607604980469, + "step": 1689 + }, + { + "epoch": 17.789473684210527, + "grad_norm": 1.3314678426468163e-06, + "learning_rate": 0.00016458947368421054, + "logits/chosen": 13.23664665222168, + "logits/rejected": 13.23664665222168, + "logps/chosen": -4880.37109375, + "logps/rejected": -4880.37109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.18994140625, + "rewards/margins": 0.0, + "rewards/rejected": -485.18994140625, + "step": 1690 + }, + { + "epoch": 17.8, + "grad_norm": 1.4908573575667106e-06, + "learning_rate": 0.0001645684210526316, + "logits/chosen": 13.249382019042969, + "logits/rejected": 13.249382019042969, + "logps/chosen": -5175.990234375, + "logps/rejected": -5175.990234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6605834960938, + "rewards/margins": 0.0, + "rewards/rejected": -514.6605834960938, + "step": 1691 + }, + { + "epoch": 17.810526315789474, + "grad_norm": 1.1765446288336534e-06, + "learning_rate": 0.00016454736842105264, + "logits/chosen": 13.194196701049805, + "logits/rejected": 13.194196701049805, + "logps/chosen": -3998.376953125, + "logps/rejected": -3998.376953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0103759765625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0103759765625, + "step": 1692 + }, + { + "epoch": 17.821052631578947, + "grad_norm": 1.171123130916385e-06, + "learning_rate": 0.0001645263157894737, + "logits/chosen": 13.194611549377441, + "logits/rejected": 13.194611549377441, + "logps/chosen": -3998.416015625, + "logps/rejected": -3998.416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0142822265625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0142822265625, + "step": 1693 + }, + { + "epoch": 17.83157894736842, + "grad_norm": 1.1897999456778052e-06, + "learning_rate": 0.00016450526315789474, + "logits/chosen": 13.242565155029297, + "logits/rejected": 13.242565155029297, + "logps/chosen": -4881.2607421875, + "logps/rejected": -4881.2607421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2789001464844, + "rewards/margins": 0.0, + "rewards/rejected": -485.2789001464844, + "step": 1694 + }, + { + "epoch": 17.842105263157894, + "grad_norm": 1.289170654672489e-06, + "learning_rate": 0.0001644842105263158, + "logits/chosen": 13.207611083984375, + "logits/rejected": 13.207611083984375, + "logps/chosen": -3779.822265625, + "logps/rejected": -3779.822265625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1131286621094, + "rewards/margins": 0.0, + "rewards/rejected": -375.1131286621094, + "step": 1695 + }, + { + "epoch": 17.852631578947367, + "grad_norm": 1.3572716852650046e-06, + "learning_rate": 0.00016446315789473686, + "logits/chosen": 13.207937240600586, + "logits/rejected": 13.207937240600586, + "logps/chosen": -4289.74853515625, + "logps/rejected": -4289.74853515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.177490234375, + "rewards/margins": 0.0, + "rewards/rejected": -426.177490234375, + "step": 1696 + }, + { + "epoch": 17.863157894736844, + "grad_norm": 1.566055289003998e-06, + "learning_rate": 0.0001644421052631579, + "logits/chosen": 13.238598823547363, + "logits/rejected": 13.238598823547363, + "logps/chosen": -5175.99609375, + "logps/rejected": -5175.99609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6611938476562, + "rewards/margins": 0.0, + "rewards/rejected": -514.6611938476562, + "step": 1697 + }, + { + "epoch": 17.873684210526317, + "grad_norm": 1.6482883893331746e-06, + "learning_rate": 0.00016442105263157896, + "logits/chosen": 13.185717582702637, + "logits/rejected": 13.185717582702637, + "logps/chosen": -3758.48828125, + "logps/rejected": -3758.48828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9305114746094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9305114746094, + "step": 1698 + }, + { + "epoch": 17.88421052631579, + "grad_norm": 2.168531864299439e-06, + "learning_rate": 0.0001644, + "logits/chosen": 13.209497451782227, + "logits/rejected": 13.209497451782227, + "logps/chosen": -4326.32421875, + "logps/rejected": -4326.32421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.44757080078125, + "rewards/margins": 0.0, + "rewards/rejected": -429.44757080078125, + "step": 1699 + }, + { + "epoch": 17.894736842105264, + "grad_norm": 1.0153877383345389e-06, + "learning_rate": 0.00016437894736842106, + "logits/chosen": 13.223252296447754, + "logits/rejected": 13.223252296447754, + "logps/chosen": -4882.1025390625, + "logps/rejected": -4882.1025390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.36309814453125, + "rewards/margins": 0.0, + "rewards/rejected": -485.36309814453125, + "step": 1700 + }, + { + "epoch": 17.894736842105264, + "eval_logits/chosen": 13.209467887878418, + "eval_logits/rejected": 13.209467887878418, + "eval_logps/chosen": -4311.66064453125, + "eval_logps/rejected": -4311.66064453125, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.26287841796875, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.26287841796875, + "eval_runtime": 4.2898, + "eval_samples_per_second": 2.331, + "eval_steps_per_second": 2.331, + "step": 1700 + }, + { + "epoch": 17.905263157894737, + "grad_norm": 1.3979546338305227e-06, + "learning_rate": 0.0001643578947368421, + "logits/chosen": 13.171424865722656, + "logits/rejected": 13.171424865722656, + "logps/chosen": -3998.767578125, + "logps/rejected": -3998.767578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0494384765625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0494384765625, + "step": 1701 + }, + { + "epoch": 17.91578947368421, + "grad_norm": 9.574965815772885e-07, + "learning_rate": 0.00016433684210526316, + "logits/chosen": 13.180306434631348, + "logits/rejected": 13.180306434631348, + "logps/chosen": -3758.78125, + "logps/rejected": -3758.78125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9598083496094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9598083496094, + "step": 1702 + }, + { + "epoch": 17.926315789473684, + "grad_norm": 2.7593716822593706e-06, + "learning_rate": 0.00016431578947368424, + "logits/chosen": 13.224714279174805, + "logits/rejected": 13.224714279174805, + "logps/chosen": -5175.970703125, + "logps/rejected": -5175.970703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6586303710938, + "rewards/margins": 0.0, + "rewards/rejected": -514.6586303710938, + "step": 1703 + }, + { + "epoch": 17.936842105263157, + "grad_norm": 1.6290850908262655e-06, + "learning_rate": 0.00016429473684210529, + "logits/chosen": 13.161206245422363, + "logits/rejected": 13.161206245422363, + "logps/chosen": -3999.0625, + "logps/rejected": -3999.0625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0789489746094, + "rewards/margins": 0.0, + "rewards/rejected": -397.0789489746094, + "step": 1704 + }, + { + "epoch": 17.94736842105263, + "grad_norm": 1.6467810155518237e-06, + "learning_rate": 0.0001642736842105263, + "logits/chosen": 13.215140342712402, + "logits/rejected": 13.215140342712402, + "logps/chosen": -5176.349609375, + "logps/rejected": -5176.349609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.696533203125, + "rewards/margins": 0.0, + "rewards/rejected": -514.696533203125, + "step": 1705 + }, + { + "epoch": 17.957894736842107, + "grad_norm": 1.444196300326439e-06, + "learning_rate": 0.00016425263157894736, + "logits/chosen": 13.21228313446045, + "logits/rejected": 13.21228313446045, + "logps/chosen": -5176.732421875, + "logps/rejected": -5176.732421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7348022460938, + "rewards/margins": 0.0, + "rewards/rejected": -514.7348022460938, + "step": 1706 + }, + { + "epoch": 17.96842105263158, + "grad_norm": 1.2963780591235263e-06, + "learning_rate": 0.00016423157894736843, + "logits/chosen": 13.174302101135254, + "logits/rejected": 13.174302101135254, + "logps/chosen": -4289.6875, + "logps/rejected": -4289.6875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.17138671875, + "rewards/margins": 0.0, + "rewards/rejected": -426.17138671875, + "step": 1707 + }, + { + "epoch": 17.978947368421053, + "grad_norm": 2.2712229110766202e-06, + "learning_rate": 0.00016421052631578948, + "logits/chosen": 13.200567245483398, + "logits/rejected": 13.200567245483398, + "logps/chosen": -4882.18701171875, + "logps/rejected": -4882.18701171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3715515136719, + "rewards/margins": 0.0, + "rewards/rejected": -485.3715515136719, + "step": 1708 + }, + { + "epoch": 17.989473684210527, + "grad_norm": 1.8442843838784029e-06, + "learning_rate": 0.00016418947368421053, + "logits/chosen": 13.167006492614746, + "logits/rejected": 13.167006492614746, + "logps/chosen": -3541.791015625, + "logps/rejected": -3541.791015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1532287597656, + "rewards/margins": 0.0, + "rewards/rejected": -351.1532287597656, + "step": 1709 + }, + { + "epoch": 18.0, + "grad_norm": 1.325768153037643e-06, + "learning_rate": 0.00016416842105263158, + "logits/chosen": 13.156643867492676, + "logits/rejected": 13.156643867492676, + "logps/chosen": -3998.87109375, + "logps/rejected": -3998.87109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0597839355469, + "rewards/margins": 0.0, + "rewards/rejected": -397.0597839355469, + "step": 1710 + }, + { + "epoch": 18.010526315789473, + "grad_norm": 1.3008568657824071e-06, + "learning_rate": 0.00016414736842105266, + "logits/chosen": 13.186636924743652, + "logits/rejected": 13.186636924743652, + "logps/chosen": -4289.2861328125, + "logps/rejected": -4289.2861328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1312561035156, + "rewards/margins": 0.0, + "rewards/rejected": -426.1312561035156, + "step": 1711 + }, + { + "epoch": 18.021052631578947, + "grad_norm": 1.221750153490575e-06, + "learning_rate": 0.00016412631578947368, + "logits/chosen": 13.217198371887207, + "logits/rejected": 13.217198371887207, + "logps/chosen": -4882.2001953125, + "logps/rejected": -4882.2001953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.37286376953125, + "rewards/margins": 0.0, + "rewards/rejected": -485.37286376953125, + "step": 1712 + }, + { + "epoch": 18.03157894736842, + "grad_norm": 9.961817113435245e-07, + "learning_rate": 0.00016410526315789473, + "logits/chosen": 13.184962272644043, + "logits/rejected": 13.184962272644043, + "logps/chosen": -3542.19921875, + "logps/rejected": -3542.19921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.1940612792969, + "rewards/margins": 0.0, + "rewards/rejected": -351.1940612792969, + "step": 1713 + }, + { + "epoch": 18.042105263157893, + "grad_norm": 9.931683280228754e-07, + "learning_rate": 0.0001640842105263158, + "logits/chosen": 13.184737205505371, + "logits/rejected": 13.184737205505371, + "logps/chosen": -3758.923828125, + "logps/rejected": -3758.923828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9740905761719, + "rewards/margins": 0.0, + "rewards/rejected": -372.9740905761719, + "step": 1714 + }, + { + "epoch": 18.05263157894737, + "grad_norm": 2.1246914911898784e-06, + "learning_rate": 0.00016406315789473685, + "logits/chosen": 13.237903594970703, + "logits/rejected": 13.237903594970703, + "logps/chosen": -5178.34765625, + "logps/rejected": -5178.34765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8963623046875, + "rewards/margins": 0.0, + "rewards/rejected": -514.8963623046875, + "step": 1715 + }, + { + "epoch": 18.063157894736843, + "grad_norm": 9.134220135820215e-07, + "learning_rate": 0.0001640421052631579, + "logits/chosen": 13.179265975952148, + "logits/rejected": 13.179265975952148, + "logps/chosen": -2671.7890625, + "logps/rejected": -2671.7890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.3973083496094, + "rewards/margins": 0.0, + "rewards/rejected": -264.3973083496094, + "step": 1716 + }, + { + "epoch": 18.073684210526316, + "grad_norm": 8.840111718200205e-07, + "learning_rate": 0.00016402105263157895, + "logits/chosen": 13.205976486206055, + "logits/rejected": 13.205976486206055, + "logps/chosen": -3542.48046875, + "logps/rejected": -3542.48046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.22216796875, + "rewards/margins": 0.0, + "rewards/rejected": -351.22216796875, + "step": 1717 + }, + { + "epoch": 18.08421052631579, + "grad_norm": 9.021367759487475e-07, + "learning_rate": 0.000164, + "logits/chosen": 13.190559387207031, + "logits/rejected": 13.190559387207031, + "logps/chosen": -2672.080078125, + "logps/rejected": -2672.080078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4264221191406, + "rewards/margins": 0.0, + "rewards/rejected": -264.4264221191406, + "step": 1718 + }, + { + "epoch": 18.094736842105263, + "grad_norm": 1.8655525764188496e-06, + "learning_rate": 0.00016397894736842105, + "logits/chosen": 13.260146141052246, + "logits/rejected": 13.260146141052246, + "logps/chosen": -5178.826171875, + "logps/rejected": -5178.826171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.9442138671875, + "rewards/margins": 0.0, + "rewards/rejected": -514.9442138671875, + "step": 1719 + }, + { + "epoch": 18.105263157894736, + "grad_norm": 9.820496416068636e-07, + "learning_rate": 0.0001639578947368421, + "logits/chosen": 13.218685150146484, + "logits/rejected": 13.218685150146484, + "logps/chosen": -3759.435546875, + "logps/rejected": -3759.435546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0252380371094, + "rewards/margins": 0.0, + "rewards/rejected": -373.0252380371094, + "step": 1720 + }, + { + "epoch": 18.11578947368421, + "grad_norm": 1.2514809668573434e-06, + "learning_rate": 0.00016393684210526318, + "logits/chosen": 13.209930419921875, + "logits/rejected": 13.209930419921875, + "logps/chosen": -3998.107421875, + "logps/rejected": -3998.107421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9834289550781, + "rewards/margins": 0.0, + "rewards/rejected": -396.9834289550781, + "step": 1721 + }, + { + "epoch": 18.126315789473683, + "grad_norm": 8.620293101557763e-07, + "learning_rate": 0.00016391578947368423, + "logits/chosen": 13.206565856933594, + "logits/rejected": 13.206565856933594, + "logps/chosen": -2672.3046875, + "logps/rejected": -2672.3046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4488830566406, + "rewards/margins": 0.0, + "rewards/rejected": -264.4488830566406, + "step": 1722 + }, + { + "epoch": 18.13684210526316, + "grad_norm": 9.048561082636297e-07, + "learning_rate": 0.00016389473684210528, + "logits/chosen": 13.205591201782227, + "logits/rejected": 13.205591201782227, + "logps/chosen": -2672.458984375, + "logps/rejected": -2672.458984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.4643249511719, + "rewards/margins": 0.0, + "rewards/rejected": -264.4643249511719, + "step": 1723 + }, + { + "epoch": 18.147368421052633, + "grad_norm": 1.1921965779038146e-06, + "learning_rate": 0.00016387368421052632, + "logits/chosen": 13.20761775970459, + "logits/rejected": 13.20761775970459, + "logps/chosen": -3997.89453125, + "logps/rejected": -3997.89453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9621276855469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9621276855469, + "step": 1724 + }, + { + "epoch": 18.157894736842106, + "grad_norm": 1.7906684206536738e-06, + "learning_rate": 0.00016385263157894737, + "logits/chosen": 13.215581893920898, + "logits/rejected": 13.215581893920898, + "logps/chosen": -3759.201171875, + "logps/rejected": -3759.201171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0018005371094, + "rewards/margins": 0.0, + "rewards/rejected": -373.0018005371094, + "step": 1725 + }, + { + "epoch": 18.16842105263158, + "grad_norm": 1.5402041526613175e-06, + "learning_rate": 0.00016383157894736842, + "logits/chosen": 13.25319766998291, + "logits/rejected": 13.25319766998291, + "logps/chosen": -4881.15185546875, + "logps/rejected": -4881.15185546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.2680358886719, + "rewards/margins": 0.0, + "rewards/rejected": -485.2680358886719, + "step": 1726 + }, + { + "epoch": 18.178947368421053, + "grad_norm": 1.3679416497325292e-06, + "learning_rate": 0.00016381052631578947, + "logits/chosen": 13.223785400390625, + "logits/rejected": 13.223785400390625, + "logps/chosen": -4289.30859375, + "logps/rejected": -4289.30859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1335144042969, + "rewards/margins": 0.0, + "rewards/rejected": -426.1335144042969, + "step": 1727 + }, + { + "epoch": 18.189473684210526, + "grad_norm": 8.217092499762657e-07, + "learning_rate": 0.00016378947368421055, + "logits/chosen": 13.189509391784668, + "logits/rejected": 13.189509391784668, + "logps/chosen": -2673.619140625, + "logps/rejected": -2673.619140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.580322265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.580322265625, + "step": 1728 + }, + { + "epoch": 18.2, + "grad_norm": 1.1977446092714672e-06, + "learning_rate": 0.0001637684210526316, + "logits/chosen": 13.188858985900879, + "logits/rejected": 13.188858985900879, + "logps/chosen": -3998.232421875, + "logps/rejected": -3998.232421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9959411621094, + "rewards/margins": 0.0, + "rewards/rejected": -396.9959411621094, + "step": 1729 + }, + { + "epoch": 18.210526315789473, + "grad_norm": 9.371688065584749e-07, + "learning_rate": 0.00016374736842105265, + "logits/chosen": 13.200185775756836, + "logits/rejected": 13.200185775756836, + "logps/chosen": -3543.51953125, + "logps/rejected": -3543.51953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3260803222656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3260803222656, + "step": 1730 + }, + { + "epoch": 18.221052631578946, + "grad_norm": 9.086565455618256e-07, + "learning_rate": 0.0001637263157894737, + "logits/chosen": 13.195902824401855, + "logits/rejected": 13.195902824401855, + "logps/chosen": -3543.580078125, + "logps/rejected": -3543.580078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3321228027344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3321228027344, + "step": 1731 + }, + { + "epoch": 18.231578947368423, + "grad_norm": 8.61796252138447e-07, + "learning_rate": 0.00016370526315789475, + "logits/chosen": 13.194711685180664, + "logits/rejected": 13.194711685180664, + "logps/chosen": -3543.75390625, + "logps/rejected": -3543.75390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3495178222656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3495178222656, + "step": 1732 + }, + { + "epoch": 18.242105263157896, + "grad_norm": 8.233304811255948e-07, + "learning_rate": 0.0001636842105263158, + "logits/chosen": 13.19621467590332, + "logits/rejected": 13.19621467590332, + "logps/chosen": -3544.06640625, + "logps/rejected": -3544.06640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3807678222656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3807678222656, + "step": 1733 + }, + { + "epoch": 18.25263157894737, + "grad_norm": 1.9305980458739214e-06, + "learning_rate": 0.00016366315789473684, + "logits/chosen": 13.24044418334961, + "logits/rejected": 13.24044418334961, + "logps/chosen": -5178.00537109375, + "logps/rejected": -5178.00537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8621215820312, + "rewards/margins": 0.0, + "rewards/rejected": -514.8621215820312, + "step": 1734 + }, + { + "epoch": 18.263157894736842, + "grad_norm": 1.5832422377570765e-06, + "learning_rate": 0.00016364210526315792, + "logits/chosen": 13.184294700622559, + "logits/rejected": 13.184294700622559, + "logps/chosen": -3998.580078125, + "logps/rejected": -3998.580078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.03070068359375, + "rewards/margins": 0.0, + "rewards/rejected": -397.03070068359375, + "step": 1735 + }, + { + "epoch": 18.273684210526316, + "grad_norm": 1.7293010614594095e-06, + "learning_rate": 0.00016362105263157897, + "logits/chosen": 13.207387924194336, + "logits/rejected": 13.207387924194336, + "logps/chosen": -4289.455078125, + "logps/rejected": -4289.455078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1481628417969, + "rewards/margins": 0.0, + "rewards/rejected": -426.1481628417969, + "step": 1736 + }, + { + "epoch": 18.28421052631579, + "grad_norm": 1.5562432054139208e-06, + "learning_rate": 0.0001636, + "logits/chosen": 13.177202224731445, + "logits/rejected": 13.177202224731445, + "logps/chosen": -2967.9921875, + "logps/rejected": -2967.9921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0730285644531, + "rewards/margins": 0.0, + "rewards/rejected": -294.0730285644531, + "step": 1737 + }, + { + "epoch": 18.294736842105262, + "grad_norm": 1.3520865422833594e-06, + "learning_rate": 0.00016357894736842104, + "logits/chosen": 13.23664665222168, + "logits/rejected": 13.23664665222168, + "logps/chosen": -5177.64599609375, + "logps/rejected": -5177.64599609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.826171875, + "rewards/margins": 0.0, + "rewards/rejected": -514.826171875, + "step": 1738 + }, + { + "epoch": 18.305263157894736, + "grad_norm": 9.935459956977866e-07, + "learning_rate": 0.00016355789473684212, + "logits/chosen": 13.196292877197266, + "logits/rejected": 13.196292877197266, + "logps/chosen": -3544.83984375, + "logps/rejected": -3544.83984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4580993652344, + "rewards/margins": 0.0, + "rewards/rejected": -351.4580993652344, + "step": 1739 + }, + { + "epoch": 18.31578947368421, + "grad_norm": 1.3221443850852665e-06, + "learning_rate": 0.00016353684210526317, + "logits/chosen": 13.234463691711426, + "logits/rejected": 13.234463691711426, + "logps/chosen": -4879.3125, + "logps/rejected": -4879.3125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0840759277344, + "rewards/margins": 0.0, + "rewards/rejected": -485.0840759277344, + "step": 1740 + }, + { + "epoch": 18.326315789473686, + "grad_norm": 1.1604338396864478e-06, + "learning_rate": 0.00016351578947368422, + "logits/chosen": 13.18543529510498, + "logits/rejected": 13.18543529510498, + "logps/chosen": -3998.95703125, + "logps/rejected": -3998.95703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0683898925781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0683898925781, + "step": 1741 + }, + { + "epoch": 18.33684210526316, + "grad_norm": 1.1535853445820976e-06, + "learning_rate": 0.00016349473684210527, + "logits/chosen": 13.185277938842773, + "logits/rejected": 13.185277938842773, + "logps/chosen": -3999.001953125, + "logps/rejected": -3999.001953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0728759765625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0728759765625, + "step": 1742 + }, + { + "epoch": 18.347368421052632, + "grad_norm": 1.194940750792739e-06, + "learning_rate": 0.00016347368421052634, + "logits/chosen": 13.195455551147461, + "logits/rejected": 13.195455551147461, + "logps/chosen": -3759.8857421875, + "logps/rejected": -3759.8857421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -373.0702819824219, + "rewards/margins": 0.0, + "rewards/rejected": -373.0702819824219, + "step": 1743 + }, + { + "epoch": 18.357894736842105, + "grad_norm": 1.3847267155142617e-06, + "learning_rate": 0.00016345263157894736, + "logits/chosen": 13.179770469665527, + "logits/rejected": 13.179770469665527, + "logps/chosen": -2968.576171875, + "logps/rejected": -2968.576171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1314392089844, + "rewards/margins": 0.0, + "rewards/rejected": -294.1314392089844, + "step": 1744 + }, + { + "epoch": 18.36842105263158, + "grad_norm": 1.594595460119308e-06, + "learning_rate": 0.0001634315789473684, + "logits/chosen": 13.238062858581543, + "logits/rejected": 13.238062858581543, + "logps/chosen": -5177.8115234375, + "logps/rejected": -5177.8115234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8427124023438, + "rewards/margins": 0.0, + "rewards/rejected": -514.8427124023438, + "step": 1745 + }, + { + "epoch": 18.378947368421052, + "grad_norm": 1.6247003031821805e-06, + "learning_rate": 0.0001634105263157895, + "logits/chosen": 13.17969036102295, + "logits/rejected": 13.17969036102295, + "logps/chosen": -3999.08984375, + "logps/rejected": -3999.08984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0816650390625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0816650390625, + "step": 1746 + }, + { + "epoch": 18.389473684210525, + "grad_norm": 7.692754593335849e-07, + "learning_rate": 0.00016338947368421054, + "logits/chosen": 13.195625305175781, + "logits/rejected": 13.195625305175781, + "logps/chosen": -3545.607421875, + "logps/rejected": -3545.607421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5348815917969, + "rewards/margins": 0.0, + "rewards/rejected": -351.5348815917969, + "step": 1747 + }, + { + "epoch": 18.4, + "grad_norm": 1.3056379657427897e-06, + "learning_rate": 0.0001633684210526316, + "logits/chosen": 13.214991569519043, + "logits/rejected": 13.214991569519043, + "logps/chosen": -4326.58203125, + "logps/rejected": -4326.58203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4733581542969, + "rewards/margins": 0.0, + "rewards/rejected": -429.4733581542969, + "step": 1748 + }, + { + "epoch": 18.410526315789475, + "grad_norm": 1.0942079597953125e-06, + "learning_rate": 0.00016334736842105264, + "logits/chosen": 13.174125671386719, + "logits/rejected": 13.174125671386719, + "logps/chosen": -2968.7578125, + "logps/rejected": -2968.7578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.14959716796875, + "rewards/margins": 0.0, + "rewards/rejected": -294.14959716796875, + "step": 1749 + }, + { + "epoch": 18.42105263157895, + "grad_norm": 2.1360276605264517e-06, + "learning_rate": 0.0001633263157894737, + "logits/chosen": 13.196590423583984, + "logits/rejected": 13.196590423583984, + "logps/chosen": -4289.212890625, + "logps/rejected": -4289.212890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.1239318847656, + "rewards/margins": 0.0, + "rewards/rejected": -426.1239318847656, + "step": 1750 + }, + { + "epoch": 18.42105263157895, + "eval_logits/chosen": 13.2058687210083, + "eval_logits/rejected": 13.2058687210083, + "eval_logps/chosen": -4311.08837890625, + "eval_logps/rejected": -4311.08837890625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.20556640625, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.20556640625, + "eval_runtime": 4.2818, + "eval_samples_per_second": 2.335, + "eval_steps_per_second": 2.335, + "step": 1750 + }, + { + "epoch": 18.431578947368422, + "grad_norm": 1.6378935470129363e-06, + "learning_rate": 0.00016330526315789474, + "logits/chosen": 13.220552444458008, + "logits/rejected": 13.220552444458008, + "logps/chosen": -4879.091796875, + "logps/rejected": -4879.091796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.06201171875, + "rewards/margins": 0.0, + "rewards/rejected": -485.06201171875, + "step": 1751 + }, + { + "epoch": 18.442105263157895, + "grad_norm": 1.196119342239399e-06, + "learning_rate": 0.00016328421052631579, + "logits/chosen": 13.164741516113281, + "logits/rejected": 13.164741516113281, + "logps/chosen": -3999.990234375, + "logps/rejected": -3999.990234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1717224121094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1717224121094, + "step": 1752 + }, + { + "epoch": 18.45263157894737, + "grad_norm": 1.747448095557047e-06, + "learning_rate": 0.00016326315789473686, + "logits/chosen": 13.20039176940918, + "logits/rejected": 13.20039176940918, + "logps/chosen": -4327.0859375, + "logps/rejected": -4327.0859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.52374267578125, + "rewards/margins": 0.0, + "rewards/rejected": -429.52374267578125, + "step": 1753 + }, + { + "epoch": 18.46315789473684, + "grad_norm": 1.3808701169182314e-06, + "learning_rate": 0.0001632421052631579, + "logits/chosen": 13.217162132263184, + "logits/rejected": 13.217162132263184, + "logps/chosen": -5177.5419921875, + "logps/rejected": -5177.5419921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.8157958984375, + "rewards/margins": 0.0, + "rewards/rejected": -514.8157958984375, + "step": 1754 + }, + { + "epoch": 18.473684210526315, + "grad_norm": 1.3913941074861214e-06, + "learning_rate": 0.00016322105263157896, + "logits/chosen": 13.216073989868164, + "logits/rejected": 13.216073989868164, + "logps/chosen": -5177.658203125, + "logps/rejected": -5177.658203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.827392578125, + "rewards/margins": 0.0, + "rewards/rejected": -514.827392578125, + "step": 1755 + }, + { + "epoch": 18.48421052631579, + "grad_norm": 1.474475766372052e-06, + "learning_rate": 0.0001632, + "logits/chosen": 13.159319877624512, + "logits/rejected": 13.159319877624512, + "logps/chosen": -2969.203125, + "logps/rejected": -2969.203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1941223144531, + "rewards/margins": 0.0, + "rewards/rejected": -294.1941223144531, + "step": 1756 + }, + { + "epoch": 18.49473684210526, + "grad_norm": 1.266942490474321e-06, + "learning_rate": 0.00016317894736842106, + "logits/chosen": 13.214132308959961, + "logits/rejected": 13.214132308959961, + "logps/chosen": -4879.36767578125, + "logps/rejected": -4879.36767578125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.089599609375, + "rewards/margins": 0.0, + "rewards/rejected": -485.089599609375, + "step": 1757 + }, + { + "epoch": 18.50526315789474, + "grad_norm": 1.2730267826555064e-06, + "learning_rate": 0.0001631578947368421, + "logits/chosen": 13.20785140991211, + "logits/rejected": 13.20785140991211, + "logps/chosen": -4328.140625, + "logps/rejected": -4328.140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.62921142578125, + "rewards/margins": 0.0, + "rewards/rejected": -429.62921142578125, + "step": 1758 + }, + { + "epoch": 18.51578947368421, + "grad_norm": 8.870155170370708e-07, + "learning_rate": 0.00016313684210526316, + "logits/chosen": 13.171010971069336, + "logits/rejected": 13.171010971069336, + "logps/chosen": -2673.05859375, + "logps/rejected": -2673.05859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5242614746094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5242614746094, + "step": 1759 + }, + { + "epoch": 18.526315789473685, + "grad_norm": 8.619770142104244e-07, + "learning_rate": 0.00016311578947368423, + "logits/chosen": 13.196556091308594, + "logits/rejected": 13.196556091308594, + "logps/chosen": -3545.423828125, + "logps/rejected": -3545.423828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5165100097656, + "rewards/margins": 0.0, + "rewards/rejected": -351.5165100097656, + "step": 1760 + }, + { + "epoch": 18.53684210526316, + "grad_norm": 1.8582466054795077e-06, + "learning_rate": 0.00016309473684210528, + "logits/chosen": 13.205060958862305, + "logits/rejected": 13.205060958862305, + "logps/chosen": -3775.66015625, + "logps/rejected": -3775.66015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.6968994140625, + "rewards/margins": 0.0, + "rewards/rejected": -374.6968994140625, + "step": 1761 + }, + { + "epoch": 18.54736842105263, + "grad_norm": 8.518305776306079e-07, + "learning_rate": 0.00016307368421052633, + "logits/chosen": 13.191962242126465, + "logits/rejected": 13.191962242126465, + "logps/chosen": -2969.541015625, + "logps/rejected": -2969.541015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.2279052734375, + "rewards/margins": 0.0, + "rewards/rejected": -294.2279052734375, + "step": 1762 + }, + { + "epoch": 18.557894736842105, + "grad_norm": 8.548731216251326e-07, + "learning_rate": 0.00016305263157894735, + "logits/chosen": 13.211217880249023, + "logits/rejected": 13.211217880249023, + "logps/chosen": -3545.353515625, + "logps/rejected": -3545.353515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5094909667969, + "rewards/margins": 0.0, + "rewards/rejected": -351.5094909667969, + "step": 1763 + }, + { + "epoch": 18.568421052631578, + "grad_norm": 1.6722326563467504e-06, + "learning_rate": 0.00016303157894736843, + "logits/chosen": 13.219381332397461, + "logits/rejected": 13.219381332397461, + "logps/chosen": -3776.5546875, + "logps/rejected": -3776.5546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.7863464355469, + "rewards/margins": 0.0, + "rewards/rejected": -374.7863464355469, + "step": 1764 + }, + { + "epoch": 18.57894736842105, + "grad_norm": 1.6722440250305226e-06, + "learning_rate": 0.00016301052631578948, + "logits/chosen": 13.255590438842773, + "logits/rejected": 13.255590438842773, + "logps/chosen": -4879.55859375, + "logps/rejected": -4879.55859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.10870361328125, + "rewards/margins": 0.0, + "rewards/rejected": -485.10870361328125, + "step": 1765 + }, + { + "epoch": 18.589473684210525, + "grad_norm": 1.3035553365625674e-06, + "learning_rate": 0.00016298947368421053, + "logits/chosen": 13.228759765625, + "logits/rejected": 13.228759765625, + "logps/chosen": -3777.3603515625, + "logps/rejected": -3777.3603515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8669128417969, + "rewards/margins": 0.0, + "rewards/rejected": -374.8669128417969, + "step": 1766 + }, + { + "epoch": 18.6, + "grad_norm": 1.1575800726859597e-06, + "learning_rate": 0.0001629684210526316, + "logits/chosen": 13.207669258117676, + "logits/rejected": 13.207669258117676, + "logps/chosen": -3998.458984375, + "logps/rejected": -3998.458984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0185852050781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0185852050781, + "step": 1767 + }, + { + "epoch": 18.610526315789475, + "grad_norm": 1.2824010582335177e-06, + "learning_rate": 0.00016294736842105265, + "logits/chosen": 13.207022666931152, + "logits/rejected": 13.207022666931152, + "logps/chosen": -3998.443359375, + "logps/rejected": -3998.443359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.01702880859375, + "rewards/margins": 0.0, + "rewards/rejected": -397.01702880859375, + "step": 1768 + }, + { + "epoch": 18.621052631578948, + "grad_norm": 1.4964734873501584e-06, + "learning_rate": 0.00016292631578947368, + "logits/chosen": 13.224481582641602, + "logits/rejected": 13.224481582641602, + "logps/chosen": -3545.59375, + "logps/rejected": -3545.59375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.53350830078125, + "rewards/margins": 0.0, + "rewards/rejected": -351.53350830078125, + "step": 1769 + }, + { + "epoch": 18.63157894736842, + "grad_norm": 1.755260882418952e-06, + "learning_rate": 0.00016290526315789473, + "logits/chosen": 13.260469436645508, + "logits/rejected": 13.260469436645508, + "logps/chosen": -4880.009765625, + "logps/rejected": -4880.009765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.15380859375, + "rewards/margins": 0.0, + "rewards/rejected": -485.15380859375, + "step": 1770 + }, + { + "epoch": 18.642105263157895, + "grad_norm": 1.6575957033637678e-06, + "learning_rate": 0.0001628842105263158, + "logits/chosen": 13.268829345703125, + "logits/rejected": 13.268829345703125, + "logps/chosen": -5176.8935546875, + "logps/rejected": -5176.8935546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7509155273438, + "rewards/margins": 0.0, + "rewards/rejected": -514.7509155273438, + "step": 1771 + }, + { + "epoch": 18.652631578947368, + "grad_norm": 1.2034778364977683e-06, + "learning_rate": 0.00016286315789473685, + "logits/chosen": 13.264095306396484, + "logits/rejected": 13.264095306396484, + "logps/chosen": -4880.46630859375, + "logps/rejected": -4880.46630859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.199462890625, + "rewards/margins": 0.0, + "rewards/rejected": -485.199462890625, + "step": 1772 + }, + { + "epoch": 18.66315789473684, + "grad_norm": 1.132873308051785e-06, + "learning_rate": 0.0001628421052631579, + "logits/chosen": 13.208897590637207, + "logits/rejected": 13.208897590637207, + "logps/chosen": -3999.048828125, + "logps/rejected": -3999.048828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.07757568359375, + "rewards/margins": 0.0, + "rewards/rejected": -397.07757568359375, + "step": 1773 + }, + { + "epoch": 18.673684210526314, + "grad_norm": 1.4973380757510313e-06, + "learning_rate": 0.00016282105263157895, + "logits/chosen": 13.271554946899414, + "logits/rejected": 13.271554946899414, + "logps/chosen": -5176.4873046875, + "logps/rejected": -5176.4873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7103271484375, + "rewards/margins": 0.0, + "rewards/rejected": -514.7103271484375, + "step": 1774 + }, + { + "epoch": 18.68421052631579, + "grad_norm": 8.517868650415039e-07, + "learning_rate": 0.0001628, + "logits/chosen": 13.207213401794434, + "logits/rejected": 13.207213401794434, + "logps/chosen": -2673.1953125, + "logps/rejected": -2673.1953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5379333496094, + "rewards/margins": 0.0, + "rewards/rejected": -264.5379333496094, + "step": 1775 + }, + { + "epoch": 18.694736842105264, + "grad_norm": 1.6368485376005992e-06, + "learning_rate": 0.00016277894736842105, + "logits/chosen": 13.270036697387695, + "logits/rejected": 13.270036697387695, + "logps/chosen": -5176.90673828125, + "logps/rejected": -5176.90673828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7522583007812, + "rewards/margins": 0.0, + "rewards/rejected": -514.7522583007812, + "step": 1776 + }, + { + "epoch": 18.705263157894738, + "grad_norm": 1.1331502491884748e-06, + "learning_rate": 0.0001627578947368421, + "logits/chosen": 13.205471992492676, + "logits/rejected": 13.205471992492676, + "logps/chosen": -3999.43359375, + "logps/rejected": -3999.43359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1160583496094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1160583496094, + "step": 1777 + }, + { + "epoch": 18.71578947368421, + "grad_norm": 9.49476543610217e-07, + "learning_rate": 0.00016273684210526317, + "logits/chosen": 13.22486686706543, + "logits/rejected": 13.22486686706543, + "logps/chosen": -3544.505859375, + "logps/rejected": -3544.505859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.4247131347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.4247131347656, + "step": 1778 + }, + { + "epoch": 18.726315789473684, + "grad_norm": 1.0547296369622927e-06, + "learning_rate": 0.00016271578947368422, + "logits/chosen": 13.25903034210205, + "logits/rejected": 13.25903034210205, + "logps/chosen": -4881.5244140625, + "logps/rejected": -4881.5244140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3052673339844, + "rewards/margins": 0.0, + "rewards/rejected": -485.3052673339844, + "step": 1779 + }, + { + "epoch": 18.736842105263158, + "grad_norm": 1.4396492815649253e-06, + "learning_rate": 0.00016269473684210527, + "logits/chosen": 13.228307723999023, + "logits/rejected": 13.228307723999023, + "logps/chosen": -4287.298828125, + "logps/rejected": -4287.298828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.9325256347656, + "rewards/margins": 0.0, + "rewards/rejected": -425.9325256347656, + "step": 1780 + }, + { + "epoch": 18.74736842105263, + "grad_norm": 2.24858717956522e-06, + "learning_rate": 0.00016267368421052632, + "logits/chosen": 13.241291046142578, + "logits/rejected": 13.241291046142578, + "logps/chosen": -4327.0, + "logps/rejected": -4327.0, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.51513671875, + "rewards/margins": 0.0, + "rewards/rejected": -429.51513671875, + "step": 1781 + }, + { + "epoch": 18.757894736842104, + "grad_norm": 1.0807607395690866e-06, + "learning_rate": 0.00016265263157894737, + "logits/chosen": 13.196743965148926, + "logits/rejected": 13.196743965148926, + "logps/chosen": -3999.537109375, + "logps/rejected": -3999.537109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.12640380859375, + "rewards/margins": 0.0, + "rewards/rejected": -397.12640380859375, + "step": 1782 + }, + { + "epoch": 18.768421052631577, + "grad_norm": 1.2052894362568622e-06, + "learning_rate": 0.00016263157894736842, + "logits/chosen": 13.218193054199219, + "logits/rejected": 13.218193054199219, + "logps/chosen": -3779.65625, + "logps/rejected": -3779.65625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.0965270996094, + "rewards/margins": 0.0, + "rewards/rejected": -375.0965270996094, + "step": 1783 + }, + { + "epoch": 18.778947368421054, + "grad_norm": 1.1812438742708764e-06, + "learning_rate": 0.00016261052631578947, + "logits/chosen": 13.214422225952148, + "logits/rejected": 13.214422225952148, + "logps/chosen": -3779.775390625, + "logps/rejected": -3779.775390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.1084289550781, + "rewards/margins": 0.0, + "rewards/rejected": -375.1084289550781, + "step": 1784 + }, + { + "epoch": 18.789473684210527, + "grad_norm": 8.895038376977027e-07, + "learning_rate": 0.00016258947368421055, + "logits/chosen": 13.208364486694336, + "logits/rejected": 13.208364486694336, + "logps/chosen": -3544.01953125, + "logps/rejected": -3544.01953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3760681152344, + "rewards/margins": 0.0, + "rewards/rejected": -351.3760681152344, + "step": 1785 + }, + { + "epoch": 18.8, + "grad_norm": 1.5744819847895997e-06, + "learning_rate": 0.0001625684210526316, + "logits/chosen": 13.2423095703125, + "logits/rejected": 13.2423095703125, + "logps/chosen": -4881.88037109375, + "logps/rejected": -4881.88037109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.34088134765625, + "rewards/margins": 0.0, + "rewards/rejected": -485.34088134765625, + "step": 1786 + }, + { + "epoch": 18.810526315789474, + "grad_norm": 1.2158124036432127e-06, + "learning_rate": 0.00016254736842105265, + "logits/chosen": 13.199462890625, + "logits/rejected": 13.199462890625, + "logps/chosen": -3757.240234375, + "logps/rejected": -3757.240234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.80572509765625, + "rewards/margins": 0.0, + "rewards/rejected": -372.80572509765625, + "step": 1787 + }, + { + "epoch": 18.821052631578947, + "grad_norm": 1.4959591680963058e-06, + "learning_rate": 0.0001625263157894737, + "logits/chosen": 13.241324424743652, + "logits/rejected": 13.241324424743652, + "logps/chosen": -4882.1474609375, + "logps/rejected": -4882.1474609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3675842285156, + "rewards/margins": 0.0, + "rewards/rejected": -485.3675842285156, + "step": 1788 + }, + { + "epoch": 18.83157894736842, + "grad_norm": 1.2322303746259422e-06, + "learning_rate": 0.00016250526315789474, + "logits/chosen": 13.193045616149902, + "logits/rejected": 13.193045616149902, + "logps/chosen": -2967.212890625, + "logps/rejected": -2967.212890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -293.9950866699219, + "rewards/margins": 0.0, + "rewards/rejected": -293.9950866699219, + "step": 1789 + }, + { + "epoch": 18.842105263157894, + "grad_norm": 1.706665102574334e-06, + "learning_rate": 0.0001624842105263158, + "logits/chosen": 13.215299606323242, + "logits/rejected": 13.215299606323242, + "logps/chosen": -4288.0556640625, + "logps/rejected": -4288.0556640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0082092285156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0082092285156, + "step": 1790 + }, + { + "epoch": 18.852631578947367, + "grad_norm": 1.2062840824000887e-06, + "learning_rate": 0.00016246315789473684, + "logits/chosen": 13.213371276855469, + "logits/rejected": 13.213371276855469, + "logps/chosen": -3781.076171875, + "logps/rejected": -3781.076171875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.2384948730469, + "rewards/margins": 0.0, + "rewards/rejected": -375.2384948730469, + "step": 1791 + }, + { + "epoch": 18.863157894736844, + "grad_norm": 1.1165700470883166e-06, + "learning_rate": 0.00016244210526315792, + "logits/chosen": 13.188092231750488, + "logits/rejected": 13.188092231750488, + "logps/chosen": -3999.748046875, + "logps/rejected": -3999.748046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1474914550781, + "rewards/margins": 0.0, + "rewards/rejected": -397.1474914550781, + "step": 1792 + }, + { + "epoch": 18.873684210526317, + "grad_norm": 8.589752837906417e-07, + "learning_rate": 0.00016242105263157897, + "logits/chosen": 13.185833930969238, + "logits/rejected": 13.185833930969238, + "logps/chosen": -2672.76953125, + "logps/rejected": -2672.76953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.495361328125, + "rewards/margins": 0.0, + "rewards/rejected": -264.495361328125, + "step": 1793 + }, + { + "epoch": 18.88421052631579, + "grad_norm": 8.52804532769369e-07, + "learning_rate": 0.00016240000000000002, + "logits/chosen": 13.182662010192871, + "logits/rejected": 13.182662010192871, + "logps/chosen": -2672.9296875, + "logps/rejected": -2672.9296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.5113830566406, + "rewards/margins": 0.0, + "rewards/rejected": -264.5113830566406, + "step": 1794 + }, + { + "epoch": 18.894736842105264, + "grad_norm": 1.524874505776097e-06, + "learning_rate": 0.00016237894736842104, + "logits/chosen": 13.193991661071777, + "logits/rejected": 13.193991661071777, + "logps/chosen": -3757.8955078125, + "logps/rejected": -3757.8955078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.8712463378906, + "rewards/margins": 0.0, + "rewards/rejected": -372.8712463378906, + "step": 1795 + }, + { + "epoch": 18.905263157894737, + "grad_norm": 1.5084017377375858e-06, + "learning_rate": 0.00016235789473684212, + "logits/chosen": 13.241434097290039, + "logits/rejected": 13.241434097290039, + "logps/chosen": -5175.3798828125, + "logps/rejected": -5175.3798828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5995483398438, + "rewards/margins": 0.0, + "rewards/rejected": -514.5995483398438, + "step": 1796 + }, + { + "epoch": 18.91578947368421, + "grad_norm": 1.1134924307043548e-06, + "learning_rate": 0.00016233684210526317, + "logits/chosen": 13.200626373291016, + "logits/rejected": 13.200626373291016, + "logps/chosen": -3782.296875, + "logps/rejected": -3782.296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -375.3605651855469, + "rewards/margins": 0.0, + "rewards/rejected": -375.3605651855469, + "step": 1797 + }, + { + "epoch": 18.926315789473684, + "grad_norm": 9.63920228969073e-07, + "learning_rate": 0.00016231578947368421, + "logits/chosen": 13.197031021118164, + "logits/rejected": 13.197031021118164, + "logps/chosen": -3543.177734375, + "logps/rejected": -3543.177734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.2919006347656, + "rewards/margins": 0.0, + "rewards/rejected": -351.2919006347656, + "step": 1798 + }, + { + "epoch": 18.936842105263157, + "grad_norm": 1.1202159839740489e-06, + "learning_rate": 0.0001622947368421053, + "logits/chosen": 13.174903869628906, + "logits/rejected": 13.174903869628906, + "logps/chosen": -3999.482421875, + "logps/rejected": -3999.482421875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1209411621094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1209411621094, + "step": 1799 + }, + { + "epoch": 18.94736842105263, + "grad_norm": 1.1833983535325387e-06, + "learning_rate": 0.00016227368421052634, + "logits/chosen": 13.229084014892578, + "logits/rejected": 13.229084014892578, + "logps/chosen": -4882.0439453125, + "logps/rejected": -4882.0439453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.35723876953125, + "rewards/margins": 0.0, + "rewards/rejected": -485.35723876953125, + "step": 1800 + }, + { + "epoch": 18.94736842105263, + "eval_logits/chosen": 13.212472915649414, + "eval_logits/rejected": 13.212472915649414, + "eval_logps/chosen": -4311.494140625, + "eval_logps/rejected": -4311.494140625, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.2461853027344, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.2461853027344, + "eval_runtime": 4.4235, + "eval_samples_per_second": 2.261, + "eval_steps_per_second": 2.261, + "step": 1800 + }, + { + "epoch": 18.957894736842107, + "grad_norm": 8.207575206142792e-07, + "learning_rate": 0.00016225263157894736, + "logits/chosen": 13.172099113464355, + "logits/rejected": 13.172099113464355, + "logps/chosen": -2674.26953125, + "logps/rejected": -2674.26953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6453552246094, + "rewards/margins": 0.0, + "rewards/rejected": -264.6453552246094, + "step": 1801 + }, + { + "epoch": 18.96842105263158, + "grad_norm": 1.1158606412209338e-06, + "learning_rate": 0.0001622315789473684, + "logits/chosen": 13.22603988647461, + "logits/rejected": 13.22603988647461, + "logps/chosen": -4882.111328125, + "logps/rejected": -4882.111328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3639831542969, + "rewards/margins": 0.0, + "rewards/rejected": -485.3639831542969, + "step": 1802 + }, + { + "epoch": 18.978947368421053, + "grad_norm": 1.7575513311385293e-06, + "learning_rate": 0.0001622105263157895, + "logits/chosen": 13.214081764221191, + "logits/rejected": 13.214081764221191, + "logps/chosen": -4325.6953125, + "logps/rejected": -4325.6953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.3846740722656, + "rewards/margins": 0.0, + "rewards/rejected": -429.3846740722656, + "step": 1803 + }, + { + "epoch": 18.989473684210527, + "grad_norm": 1.5989957091733231e-06, + "learning_rate": 0.00016218947368421054, + "logits/chosen": 13.23378849029541, + "logits/rejected": 13.23378849029541, + "logps/chosen": -5174.8564453125, + "logps/rejected": -5174.8564453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.5472412109375, + "rewards/margins": 0.0, + "rewards/rejected": -514.5472412109375, + "step": 1804 + }, + { + "epoch": 19.0, + "grad_norm": 1.1448355508036911e-06, + "learning_rate": 0.00016216842105263159, + "logits/chosen": 13.22888469696045, + "logits/rejected": 13.22888469696045, + "logps/chosen": -4882.234375, + "logps/rejected": -4882.234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.37628173828125, + "rewards/margins": 0.0, + "rewards/rejected": -485.37628173828125, + "step": 1805 + }, + { + "epoch": 19.010526315789473, + "grad_norm": 1.2935046243001125e-06, + "learning_rate": 0.00016214736842105264, + "logits/chosen": 13.184165000915527, + "logits/rejected": 13.184165000915527, + "logps/chosen": -2967.94921875, + "logps/rejected": -2967.94921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0687255859375, + "rewards/margins": 0.0, + "rewards/rejected": -294.0687255859375, + "step": 1806 + }, + { + "epoch": 19.021052631578947, + "grad_norm": 1.386531266689417e-06, + "learning_rate": 0.00016212631578947368, + "logits/chosen": 13.227188110351562, + "logits/rejected": 13.227188110351562, + "logps/chosen": -4325.90234375, + "logps/rejected": -4325.90234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4053649902344, + "rewards/margins": 0.0, + "rewards/rejected": -429.4053649902344, + "step": 1807 + }, + { + "epoch": 19.03157894736842, + "grad_norm": 1.4685055020891014e-06, + "learning_rate": 0.00016210526315789473, + "logits/chosen": 13.186352729797363, + "logits/rejected": 13.186352729797363, + "logps/chosen": -3999.365234375, + "logps/rejected": -3999.365234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.1092224121094, + "rewards/margins": 0.0, + "rewards/rejected": -397.1092224121094, + "step": 1808 + }, + { + "epoch": 19.042105263157893, + "grad_norm": 1.2376596032481757e-06, + "learning_rate": 0.00016208421052631578, + "logits/chosen": 13.231396675109863, + "logits/rejected": 13.231396675109863, + "logps/chosen": -4326.2919921875, + "logps/rejected": -4326.2919921875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4443359375, + "rewards/margins": 0.0, + "rewards/rejected": -429.4443359375, + "step": 1809 + }, + { + "epoch": 19.05263157894737, + "grad_norm": 1.2141348406657926e-06, + "learning_rate": 0.00016206315789473686, + "logits/chosen": 13.23031997680664, + "logits/rejected": 13.23031997680664, + "logps/chosen": -4326.7333984375, + "logps/rejected": -4326.7333984375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.4884948730469, + "rewards/margins": 0.0, + "rewards/rejected": -429.4884948730469, + "step": 1810 + }, + { + "epoch": 19.063157894736843, + "grad_norm": 2.3427364794770256e-06, + "learning_rate": 0.0001620421052631579, + "logits/chosen": 13.246783256530762, + "logits/rejected": 13.246783256530762, + "logps/chosen": -5175.3828125, + "logps/rejected": -5175.3828125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.599853515625, + "rewards/margins": 0.0, + "rewards/rejected": -514.599853515625, + "step": 1811 + }, + { + "epoch": 19.073684210526316, + "grad_norm": 7.989654022821924e-07, + "learning_rate": 0.00016202105263157896, + "logits/chosen": 13.186037063598633, + "logits/rejected": 13.186037063598633, + "logps/chosen": -2674.52734375, + "logps/rejected": -2674.52734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.671142578125, + "rewards/margins": 0.0, + "rewards/rejected": -264.671142578125, + "step": 1812 + }, + { + "epoch": 19.08421052631579, + "grad_norm": 1.1549470855243271e-06, + "learning_rate": 0.000162, + "logits/chosen": 13.242344856262207, + "logits/rejected": 13.242344856262207, + "logps/chosen": -4882.19140625, + "logps/rejected": -4882.19140625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.3719787597656, + "rewards/margins": 0.0, + "rewards/rejected": -485.3719787597656, + "step": 1813 + }, + { + "epoch": 19.094736842105263, + "grad_norm": 8.248579774772224e-07, + "learning_rate": 0.00016197894736842106, + "logits/chosen": 13.189043045043945, + "logits/rejected": 13.189043045043945, + "logps/chosen": -2674.630859375, + "logps/rejected": -2674.630859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6814880371094, + "rewards/margins": 0.0, + "rewards/rejected": -264.6814880371094, + "step": 1814 + }, + { + "epoch": 19.105263157894736, + "grad_norm": 1.630377028050134e-06, + "learning_rate": 0.0001619578947368421, + "logits/chosen": 13.254777908325195, + "logits/rejected": 13.254777908325195, + "logps/chosen": -5176.736328125, + "logps/rejected": -5176.736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7352294921875, + "rewards/margins": 0.0, + "rewards/rejected": -514.7352294921875, + "step": 1815 + }, + { + "epoch": 19.11578947368421, + "grad_norm": 1.6961309938778868e-06, + "learning_rate": 0.00016193684210526316, + "logits/chosen": 13.259284973144531, + "logits/rejected": 13.259284973144531, + "logps/chosen": -5176.9248046875, + "logps/rejected": -5176.9248046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.7540893554688, + "rewards/margins": 0.0, + "rewards/rejected": -514.7540893554688, + "step": 1816 + }, + { + "epoch": 19.126315789473683, + "grad_norm": 1.3150581708032405e-06, + "learning_rate": 0.00016191578947368423, + "logits/chosen": 13.200679779052734, + "logits/rejected": 13.200679779052734, + "logps/chosen": -3998.30859375, + "logps/rejected": -3998.30859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0035400390625, + "rewards/margins": 0.0, + "rewards/rejected": -397.0035400390625, + "step": 1817 + }, + { + "epoch": 19.13684210526316, + "grad_norm": 8.613211548436084e-07, + "learning_rate": 0.00016189473684210528, + "logits/chosen": 13.204448699951172, + "logits/rejected": 13.204448699951172, + "logps/chosen": -2674.6162109375, + "logps/rejected": -2674.6162109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.6800231933594, + "rewards/margins": 0.0, + "rewards/rejected": -264.6800231933594, + "step": 1818 + }, + { + "epoch": 19.147368421052633, + "grad_norm": 1.0789335647132248e-06, + "learning_rate": 0.00016187368421052633, + "logits/chosen": 13.226563453674316, + "logits/rejected": 13.226563453674316, + "logps/chosen": -3544.240234375, + "logps/rejected": -3544.240234375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3981628417969, + "rewards/margins": 0.0, + "rewards/rejected": -351.3981628417969, + "step": 1819 + }, + { + "epoch": 19.157894736842106, + "grad_norm": 9.315833722212119e-07, + "learning_rate": 0.00016185263157894738, + "logits/chosen": 13.230652809143066, + "logits/rejected": 13.230652809143066, + "logps/chosen": -3544.158203125, + "logps/rejected": -3544.158203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.38995361328125, + "rewards/margins": 0.0, + "rewards/rejected": -351.38995361328125, + "step": 1820 + }, + { + "epoch": 19.16842105263158, + "grad_norm": 8.662165100759012e-07, + "learning_rate": 0.00016183157894736843, + "logits/chosen": 13.237730026245117, + "logits/rejected": 13.237730026245117, + "logps/chosen": -3544.154296875, + "logps/rejected": -3544.154296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.3895568847656, + "rewards/margins": 0.0, + "rewards/rejected": -351.3895568847656, + "step": 1821 + }, + { + "epoch": 19.178947368421053, + "grad_norm": 1.006268121273024e-06, + "learning_rate": 0.00016181052631578948, + "logits/chosen": 13.239742279052734, + "logits/rejected": 13.239742279052734, + "logps/chosen": -3758.587890625, + "logps/rejected": -3758.587890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.94049072265625, + "rewards/margins": 0.0, + "rewards/rejected": -372.94049072265625, + "step": 1822 + }, + { + "epoch": 19.189473684210526, + "grad_norm": 1.0211598464593408e-06, + "learning_rate": 0.00016178947368421053, + "logits/chosen": 13.24504566192627, + "logits/rejected": 13.24504566192627, + "logps/chosen": -3758.546875, + "logps/rejected": -3758.546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9363708496094, + "rewards/margins": 0.0, + "rewards/rejected": -372.9363708496094, + "step": 1823 + }, + { + "epoch": 19.2, + "grad_norm": 1.6710969248379115e-06, + "learning_rate": 0.0001617684210526316, + "logits/chosen": 13.280534744262695, + "logits/rejected": 13.280534744262695, + "logps/chosen": -4329.84765625, + "logps/rejected": -4329.84765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -429.7998962402344, + "rewards/margins": 0.0, + "rewards/rejected": -429.7998962402344, + "step": 1824 + }, + { + "epoch": 19.210526315789473, + "grad_norm": 1.0648620900610695e-06, + "learning_rate": 0.00016174736842105265, + "logits/chosen": 13.250589370727539, + "logits/rejected": 13.250589370727539, + "logps/chosen": -3758.7060546875, + "logps/rejected": -3758.7060546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -372.9523010253906, + "rewards/margins": 0.0, + "rewards/rejected": -372.9523010253906, + "step": 1825 + }, + { + "epoch": 19.221052631578946, + "grad_norm": 2.909375325543806e-06, + "learning_rate": 0.00016172631578947368, + "logits/chosen": 13.261029243469238, + "logits/rejected": 13.261029243469238, + "logps/chosen": -4286.9296875, + "logps/rejected": -4286.9296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -425.8955993652344, + "rewards/margins": 0.0, + "rewards/rejected": -425.8955993652344, + "step": 1826 + }, + { + "epoch": 19.231578947368423, + "grad_norm": 8.360383390026982e-07, + "learning_rate": 0.00016170526315789472, + "logits/chosen": 13.229317665100098, + "logits/rejected": 13.229317665100098, + "logps/chosen": -2675.111328125, + "logps/rejected": -2675.111328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.72955322265625, + "rewards/margins": 0.0, + "rewards/rejected": -264.72955322265625, + "step": 1827 + }, + { + "epoch": 19.242105263157896, + "grad_norm": 1.2091257985957782e-06, + "learning_rate": 0.0001616842105263158, + "logits/chosen": 13.21916675567627, + "logits/rejected": 13.21916675567627, + "logps/chosen": -3996.8671875, + "logps/rejected": -3996.8671875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8594055175781, + "rewards/margins": 0.0, + "rewards/rejected": -396.8594055175781, + "step": 1828 + }, + { + "epoch": 19.25263157894737, + "grad_norm": 1.8102182366419584e-06, + "learning_rate": 0.00016166315789473685, + "logits/chosen": 13.21895694732666, + "logits/rejected": 13.21895694732666, + "logps/chosen": -2968.1845703125, + "logps/rejected": -2968.1845703125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.0922546386719, + "rewards/margins": 0.0, + "rewards/rejected": -294.0922546386719, + "step": 1829 + }, + { + "epoch": 19.263157894736842, + "grad_norm": 1.3274924413053668e-06, + "learning_rate": 0.0001616421052631579, + "logits/chosen": 13.228885650634766, + "logits/rejected": 13.228885650634766, + "logps/chosen": -3545.7734375, + "logps/rejected": -3545.7734375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.5514831542969, + "rewards/margins": 0.0, + "rewards/rejected": -351.5514831542969, + "step": 1830 + }, + { + "epoch": 19.273684210526316, + "grad_norm": 1.4063456319490797e-06, + "learning_rate": 0.00016162105263157895, + "logits/chosen": 13.266222953796387, + "logits/rejected": 13.266222953796387, + "logps/chosen": -4879.04736328125, + "logps/rejected": -4879.04736328125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0575866699219, + "rewards/margins": 0.0, + "rewards/rejected": -485.0575866699219, + "step": 1831 + }, + { + "epoch": 19.28421052631579, + "grad_norm": 1.2195123417768627e-06, + "learning_rate": 0.00016160000000000002, + "logits/chosen": 13.20736312866211, + "logits/rejected": 13.20736312866211, + "logps/chosen": -3996.8515625, + "logps/rejected": -3996.8515625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.85784912109375, + "rewards/margins": 0.0, + "rewards/rejected": -396.85784912109375, + "step": 1832 + }, + { + "epoch": 19.294736842105262, + "grad_norm": 1.2364076837911853e-06, + "learning_rate": 0.00016157894736842105, + "logits/chosen": 13.20590877532959, + "logits/rejected": 13.20590877532959, + "logps/chosen": -3996.908203125, + "logps/rejected": -3996.908203125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.8634948730469, + "rewards/margins": 0.0, + "rewards/rejected": -396.8634948730469, + "step": 1833 + }, + { + "epoch": 19.305263157894736, + "grad_norm": 7.871368552514468e-07, + "learning_rate": 0.0001615578947368421, + "logits/chosen": 13.202531814575195, + "logits/rejected": 13.202531814575195, + "logps/chosen": -2675.5341796875, + "logps/rejected": -2675.5341796875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7718200683594, + "rewards/margins": 0.0, + "rewards/rejected": -264.7718200683594, + "step": 1834 + }, + { + "epoch": 19.31578947368421, + "grad_norm": 1.3683973065781174e-06, + "learning_rate": 0.00016153684210526317, + "logits/chosen": 13.19500732421875, + "logits/rejected": 13.19500732421875, + "logps/chosen": -3998.021484375, + "logps/rejected": -3998.021484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -396.9748229980469, + "rewards/margins": 0.0, + "rewards/rejected": -396.9748229980469, + "step": 1835 + }, + { + "epoch": 19.326315789473686, + "grad_norm": 8.103224331534875e-07, + "learning_rate": 0.00016151578947368422, + "logits/chosen": 13.187421798706055, + "logits/rejected": 13.187421798706055, + "logps/chosen": -2675.7939453125, + "logps/rejected": -2675.7939453125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -264.7978210449219, + "rewards/margins": 0.0, + "rewards/rejected": -264.7978210449219, + "step": 1836 + }, + { + "epoch": 19.33684210526316, + "grad_norm": 7.7311335644481e-07, + "learning_rate": 0.00016149473684210527, + "logits/chosen": 13.199485778808594, + "logits/rejected": 13.199485778808594, + "logps/chosen": -3545.5712890625, + "logps/rejected": -3545.5712890625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.53125, + "rewards/margins": 0.0, + "rewards/rejected": -351.53125, + "step": 1837 + }, + { + "epoch": 19.347368421052632, + "grad_norm": 1.428639848199964e-06, + "learning_rate": 0.00016147368421052632, + "logits/chosen": 13.193339347839355, + "logits/rejected": 13.193339347839355, + "logps/chosen": -3776.21484375, + "logps/rejected": -3776.21484375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.75238037109375, + "rewards/margins": 0.0, + "rewards/rejected": -374.75238037109375, + "step": 1838 + }, + { + "epoch": 19.357894736842105, + "grad_norm": 1.1254073797317687e-06, + "learning_rate": 0.00016145263157894737, + "logits/chosen": 13.160295486450195, + "logits/rejected": 13.160295486450195, + "logps/chosen": -3999.005859375, + "logps/rejected": -3999.005859375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.0732727050781, + "rewards/margins": 0.0, + "rewards/rejected": -397.0732727050781, + "step": 1839 + }, + { + "epoch": 19.36842105263158, + "grad_norm": 1.3588329466074356e-06, + "learning_rate": 0.00016143157894736842, + "logits/chosen": 13.176033020019531, + "logits/rejected": 13.176033020019531, + "logps/chosen": -3777.0634765625, + "logps/rejected": -3777.0634765625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -374.8372497558594, + "rewards/margins": 0.0, + "rewards/rejected": -374.8372497558594, + "step": 1840 + }, + { + "epoch": 19.378947368421052, + "grad_norm": 1.33735113649891e-06, + "learning_rate": 0.00016141052631578947, + "logits/chosen": 13.201398849487305, + "logits/rejected": 13.201398849487305, + "logps/chosen": -4878.69873046875, + "logps/rejected": -4878.69873046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.022705078125, + "rewards/margins": 0.0, + "rewards/rejected": -485.022705078125, + "step": 1841 + }, + { + "epoch": 19.389473684210525, + "grad_norm": 1.5353456319644465e-06, + "learning_rate": 0.00016138947368421054, + "logits/chosen": 13.20130443572998, + "logits/rejected": 13.20130443572998, + "logps/chosen": -5175.6748046875, + "logps/rejected": -5175.6748046875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -514.6290893554688, + "rewards/margins": 0.0, + "rewards/rejected": -514.6290893554688, + "step": 1842 + }, + { + "epoch": 19.4, + "grad_norm": 1.4818235740676755e-06, + "learning_rate": 0.0001613684210526316, + "logits/chosen": 13.18979263305664, + "logits/rejected": 13.18979263305664, + "logps/chosen": -4878.943359375, + "logps/rejected": -4878.943359375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.04718017578125, + "rewards/margins": 0.0, + "rewards/rejected": -485.04718017578125, + "step": 1843 + }, + { + "epoch": 19.410526315789475, + "grad_norm": 1.6471550452479278e-06, + "learning_rate": 0.00016134736842105264, + "logits/chosen": 13.186290740966797, + "logits/rejected": 13.186290740966797, + "logps/chosen": -4879.10546875, + "logps/rejected": -4879.10546875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.0633850097656, + "rewards/margins": 0.0, + "rewards/rejected": -485.0633850097656, + "step": 1844 + }, + { + "epoch": 19.42105263157895, + "grad_norm": 1.7259623064092011e-06, + "learning_rate": 0.0001613263157894737, + "logits/chosen": 13.150799751281738, + "logits/rejected": 13.150799751281738, + "logps/chosen": -4288.2705078125, + "logps/rejected": -4288.2705078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -426.0296936035156, + "rewards/margins": 0.0, + "rewards/rejected": -426.0296936035156, + "step": 1845 + }, + { + "epoch": 19.431578947368422, + "grad_norm": 1.8720892285273294e-06, + "learning_rate": 0.00016130526315789474, + "logits/chosen": 13.13432502746582, + "logits/rejected": 13.13432502746582, + "logps/chosen": -2968.306640625, + "logps/rejected": -2968.306640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -294.1044616699219, + "rewards/margins": 0.0, + "rewards/rejected": -294.1044616699219, + "step": 1846 + }, + { + "epoch": 19.442105263157895, + "grad_norm": 9.840515531323035e-07, + "learning_rate": 0.0001612842105263158, + "logits/chosen": 13.150936126708984, + "logits/rejected": 13.150936126708984, + "logps/chosen": -3544.3681640625, + "logps/rejected": -3544.3681640625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.41094970703125, + "rewards/margins": 0.0, + "rewards/rejected": -351.41094970703125, + "step": 1847 + }, + { + "epoch": 19.45263157894737, + "grad_norm": 1.1795752925536362e-06, + "learning_rate": 0.00016126315789473684, + "logits/chosen": 13.131905555725098, + "logits/rejected": 13.131905555725098, + "logps/chosen": -4000.6953125, + "logps/rejected": -4000.6953125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -397.2422180175781, + "rewards/margins": 0.0, + "rewards/rejected": -397.2422180175781, + "step": 1848 + }, + { + "epoch": 19.46315789473684, + "grad_norm": 1.0176698879149626e-06, + "learning_rate": 0.00016124210526315792, + "logits/chosen": 13.160609245300293, + "logits/rejected": 13.160609245300293, + "logps/chosen": -3544.1650390625, + "logps/rejected": -3544.1650390625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -351.390625, + "rewards/margins": 0.0, + "rewards/rejected": -351.390625, + "step": 1849 + }, + { + "epoch": 19.473684210526315, + "grad_norm": 4.105552761757281e-06, + "learning_rate": 0.00016122105263157897, + "logits/chosen": 13.200133323669434, + "logits/rejected": 13.200133323669434, + "logps/chosen": -4880.45849609375, + "logps/rejected": -4880.45849609375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": -485.1986999511719, + "rewards/margins": 0.0, + "rewards/rejected": -485.1986999511719, + "step": 1850 + }, + { + "epoch": 19.473684210526315, + "eval_logits/chosen": 13.179985046386719, + "eval_logits/rejected": 13.179985046386719, + "eval_logps/chosen": -4311.1513671875, + "eval_logps/rejected": -4311.1513671875, + "eval_loss": 0.6931471824645996, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -428.21197509765625, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -428.21197509765625, + "eval_runtime": 4.3529, + "eval_samples_per_second": 2.297, + "eval_steps_per_second": 2.297, + "step": 1850 + } + ], + "logging_steps": 1, + "max_steps": 9500, + "num_input_tokens_seen": 0, + "num_train_epochs": 100, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}