| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9969230769230769, |
| "eval_steps": 500, |
| "global_step": 81, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012307692307692308, |
| "grad_norm": 61.560791015625, |
| "learning_rate": 0.0, |
| "logits/chosen": 0.46728515625, |
| "logits/rejected": 1.0048828125, |
| "logps/chosen": -146.25, |
| "logps/rejected": -500.0, |
| "loss": 0.6864, |
| "rewards/accuracies": 0.203125, |
| "rewards/chosen": -0.007829666137695312, |
| "rewards/margins": 0.016693115234375, |
| "rewards/rejected": -0.024505615234375, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.024615384615384615, |
| "grad_norm": 55.07929611206055, |
| "learning_rate": 1.111111111111111e-07, |
| "logits/chosen": 0.46484375, |
| "logits/rejected": 0.919921875, |
| "logps/chosen": -125.25, |
| "logps/rejected": -433.0, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.1614583432674408, |
| "rewards/chosen": -0.0067844390869140625, |
| "rewards/margins": 0.005615234375, |
| "rewards/rejected": -0.01235198974609375, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.036923076923076927, |
| "grad_norm": 59.356468200683594, |
| "learning_rate": 2.222222222222222e-07, |
| "logits/chosen": 0.3984375, |
| "logits/rejected": 0.935546875, |
| "logps/chosen": -136.75, |
| "logps/rejected": -475.5, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.1927083432674408, |
| "rewards/chosen": -0.00756072998046875, |
| "rewards/margins": 0.01003265380859375, |
| "rewards/rejected": -0.0175628662109375, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.04923076923076923, |
| "grad_norm": 56.093135833740234, |
| "learning_rate": 3.333333333333333e-07, |
| "logits/chosen": 0.52490234375, |
| "logits/rejected": 0.94921875, |
| "logps/chosen": -137.375, |
| "logps/rejected": -458.0, |
| "loss": 0.6821, |
| "rewards/accuracies": 0.1848958432674408, |
| "rewards/chosen": -0.0155181884765625, |
| "rewards/margins": 0.0217437744140625, |
| "rewards/rejected": -0.037322998046875, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06153846153846154, |
| "grad_norm": 58.384185791015625, |
| "learning_rate": 4.444444444444444e-07, |
| "logits/chosen": 0.455078125, |
| "logits/rejected": 0.880859375, |
| "logps/chosen": -140.375, |
| "logps/rejected": -465.5, |
| "loss": 0.701, |
| "rewards/accuracies": 0.0963541716337204, |
| "rewards/chosen": -0.00130462646484375, |
| "rewards/margins": -0.01213836669921875, |
| "rewards/rejected": 0.010816574096679688, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07384615384615385, |
| "grad_norm": 56.41188049316406, |
| "learning_rate": 5.555555555555555e-07, |
| "logits/chosen": 0.4716796875, |
| "logits/rejected": 0.990234375, |
| "logps/chosen": -143.125, |
| "logps/rejected": -462.5, |
| "loss": 0.687, |
| "rewards/accuracies": 0.1770833432674408, |
| "rewards/chosen": -0.0169525146484375, |
| "rewards/margins": 0.013661384582519531, |
| "rewards/rejected": -0.0306549072265625, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.08615384615384615, |
| "grad_norm": 51.86308670043945, |
| "learning_rate": 6.666666666666666e-07, |
| "logits/chosen": 0.6162109375, |
| "logits/rejected": 0.9208984375, |
| "logps/chosen": -148.75, |
| "logps/rejected": -453.0, |
| "loss": 0.6873, |
| "rewards/accuracies": 0.1979166716337204, |
| "rewards/chosen": -0.008083343505859375, |
| "rewards/margins": 0.0156707763671875, |
| "rewards/rejected": -0.023693084716796875, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.09846153846153846, |
| "grad_norm": 53.71965789794922, |
| "learning_rate": 7.777777777777778e-07, |
| "logits/chosen": 0.431640625, |
| "logits/rejected": 0.93359375, |
| "logps/chosen": -155.25, |
| "logps/rejected": -469.0, |
| "loss": 0.6751, |
| "rewards/accuracies": 0.2526041865348816, |
| "rewards/chosen": -0.02423095703125, |
| "rewards/margins": 0.0421142578125, |
| "rewards/rejected": -0.06634521484375, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.11076923076923077, |
| "grad_norm": 48.32746124267578, |
| "learning_rate": 8.888888888888888e-07, |
| "logits/chosen": 0.46923828125, |
| "logits/rejected": 0.982421875, |
| "logps/chosen": -156.875, |
| "logps/rejected": -459.5, |
| "loss": 0.6642, |
| "rewards/accuracies": 0.3098958432674408, |
| "rewards/chosen": -0.03485107421875, |
| "rewards/margins": 0.06591796875, |
| "rewards/rejected": -0.100830078125, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.12307692307692308, |
| "grad_norm": 43.558956146240234, |
| "learning_rate": 1e-06, |
| "logits/chosen": 0.41650390625, |
| "logits/rejected": 0.912109375, |
| "logps/chosen": -156.5, |
| "logps/rejected": -461.5, |
| "loss": 0.6053, |
| "rewards/accuracies": 0.5963541865348816, |
| "rewards/chosen": -0.1070556640625, |
| "rewards/margins": 0.19580078125, |
| "rewards/rejected": -0.302734375, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13538461538461538, |
| "grad_norm": 42.5031852722168, |
| "learning_rate": 9.99571699711836e-07, |
| "logits/chosen": 0.4072265625, |
| "logits/rejected": 0.921875, |
| "logps/chosen": -135.375, |
| "logps/rejected": -447.5, |
| "loss": 0.603, |
| "rewards/accuracies": 0.5651041865348816, |
| "rewards/chosen": -0.11669921875, |
| "rewards/margins": 0.20654296875, |
| "rewards/rejected": -0.32373046875, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1476923076923077, |
| "grad_norm": 44.24930953979492, |
| "learning_rate": 9.982876141412855e-07, |
| "logits/chosen": 0.42626953125, |
| "logits/rejected": 0.966796875, |
| "logps/chosen": -160.75, |
| "logps/rejected": -475.0, |
| "loss": 0.5923, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -0.1500244140625, |
| "rewards/margins": 0.234619140625, |
| "rewards/rejected": -0.384765625, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 45.38541793823242, |
| "learning_rate": 9.961501876182148e-07, |
| "logits/chosen": 0.418212890625, |
| "logits/rejected": 1.0390625, |
| "logps/chosen": -147.25, |
| "logps/rejected": -486.0, |
| "loss": 0.5736, |
| "rewards/accuracies": 0.6354166865348816, |
| "rewards/chosen": -0.154052734375, |
| "rewards/margins": 0.27734375, |
| "rewards/rejected": -0.43115234375, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.1723076923076923, |
| "grad_norm": 33.105194091796875, |
| "learning_rate": 9.931634888554935e-07, |
| "logits/chosen": 0.6103515625, |
| "logits/rejected": 0.962890625, |
| "logps/chosen": -157.0, |
| "logps/rejected": -471.0, |
| "loss": 0.4972, |
| "rewards/accuracies": 0.6536458730697632, |
| "rewards/chosen": -0.318359375, |
| "rewards/margins": 0.541015625, |
| "rewards/rejected": -0.8603515625, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.18461538461538463, |
| "grad_norm": 41.91411209106445, |
| "learning_rate": 9.8933320320397e-07, |
| "logits/chosen": 0.5654296875, |
| "logits/rejected": 1.0224609375, |
| "logps/chosen": -153.5, |
| "logps/rejected": -469.5, |
| "loss": 0.4838, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.28857421875, |
| "rewards/margins": 0.58642578125, |
| "rewards/rejected": -0.8740234375, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.19692307692307692, |
| "grad_norm": 43.794559478759766, |
| "learning_rate": 9.846666218300807e-07, |
| "logits/chosen": 0.61572265625, |
| "logits/rejected": 1.0078125, |
| "logps/chosen": -139.75, |
| "logps/rejected": -469.0, |
| "loss": 0.4657, |
| "rewards/accuracies": 0.6822917461395264, |
| "rewards/chosen": -0.27978515625, |
| "rewards/margins": 0.6474609375, |
| "rewards/rejected": -0.9267578125, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.20923076923076922, |
| "grad_norm": 42.468875885009766, |
| "learning_rate": 9.791726278367021e-07, |
| "logits/chosen": 0.58154296875, |
| "logits/rejected": 1.017578125, |
| "logps/chosen": -166.625, |
| "logps/rejected": -489.5, |
| "loss": 0.4613, |
| "rewards/accuracies": 0.6875000596046448, |
| "rewards/chosen": -0.33544921875, |
| "rewards/margins": 0.6884765625, |
| "rewards/rejected": -1.0234375, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.22153846153846155, |
| "grad_norm": 27.986238479614258, |
| "learning_rate": 9.728616793536587e-07, |
| "logits/chosen": 0.51513671875, |
| "logits/rejected": 0.9306640625, |
| "logps/chosen": -143.375, |
| "logps/rejected": -453.5, |
| "loss": 0.4488, |
| "rewards/accuracies": 0.6822916865348816, |
| "rewards/chosen": -0.316650390625, |
| "rewards/margins": 0.7265625, |
| "rewards/rejected": -1.04296875, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.23384615384615384, |
| "grad_norm": 27.39495086669922, |
| "learning_rate": 9.657457896300791e-07, |
| "logits/chosen": 0.45166015625, |
| "logits/rejected": 1.0146484375, |
| "logps/chosen": -153.625, |
| "logps/rejected": -483.5, |
| "loss": 0.4378, |
| "rewards/accuracies": 0.6953125596046448, |
| "rewards/chosen": -0.38671875, |
| "rewards/margins": 0.7861328125, |
| "rewards/rejected": -1.173828125, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.24615384615384617, |
| "grad_norm": 26.7589168548584, |
| "learning_rate": 9.578385041664925e-07, |
| "logits/chosen": 0.5009765625, |
| "logits/rejected": 0.9814453125, |
| "logps/chosen": -148.5, |
| "logps/rejected": -470.0, |
| "loss": 0.4428, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.42041015625, |
| "rewards/margins": 0.78515625, |
| "rewards/rejected": -1.205078125, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.25846153846153846, |
| "grad_norm": 27.149879455566406, |
| "learning_rate": 9.491548749301997e-07, |
| "logits/chosen": 0.541015625, |
| "logits/rejected": 1.08203125, |
| "logps/chosen": -154.75, |
| "logps/rejected": -497.5, |
| "loss": 0.4183, |
| "rewards/accuracies": 0.7135417461395264, |
| "rewards/chosen": -0.394287109375, |
| "rewards/margins": 0.8642578125, |
| "rewards/rejected": -1.259765625, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.27076923076923076, |
| "grad_norm": 18.02589988708496, |
| "learning_rate": 9.397114317029974e-07, |
| "logits/chosen": 0.5244140625, |
| "logits/rejected": 1.05859375, |
| "logps/chosen": -159.5, |
| "logps/rejected": -493.0, |
| "loss": 0.3616, |
| "rewards/accuracies": 0.7135416865348816, |
| "rewards/chosen": -0.5263671875, |
| "rewards/margins": 1.1875, |
| "rewards/rejected": -1.712890625, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.28307692307692306, |
| "grad_norm": 18.0925235748291, |
| "learning_rate": 9.295261506157985e-07, |
| "logits/chosen": 0.63671875, |
| "logits/rejected": 1.072265625, |
| "logps/chosen": -144.25, |
| "logps/rejected": -493.0, |
| "loss": 0.3126, |
| "rewards/accuracies": 0.7526041865348816, |
| "rewards/chosen": -0.5400390625, |
| "rewards/margins": 1.5, |
| "rewards/rejected": -2.0390625, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.2953846153846154, |
| "grad_norm": 14.509533882141113, |
| "learning_rate": 9.186184199300463e-07, |
| "logits/chosen": 0.4853515625, |
| "logits/rejected": 1.0283203125, |
| "logps/chosen": -148.5, |
| "logps/rejected": -472.5, |
| "loss": 0.3201, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -0.5908203125, |
| "rewards/margins": 1.5859375, |
| "rewards/rejected": -2.17578125, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 11.895376205444336, |
| "learning_rate": 9.070090031310558e-07, |
| "logits/chosen": 0.607421875, |
| "logits/rejected": 1.048828125, |
| "logps/chosen": -162.0, |
| "logps/rejected": -473.5, |
| "loss": 0.3188, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6826171875, |
| "rewards/margins": 1.634765625, |
| "rewards/rejected": -2.31640625, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 11.410306930541992, |
| "learning_rate": 8.9471999940354e-07, |
| "logits/chosen": 0.468505859375, |
| "logits/rejected": 1.0244140625, |
| "logps/chosen": -143.625, |
| "logps/rejected": -472.5, |
| "loss": 0.2782, |
| "rewards/accuracies": 0.7447916865348816, |
| "rewards/chosen": -0.61376953125, |
| "rewards/margins": 1.9375, |
| "rewards/rejected": -2.546875, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.3323076923076923, |
| "grad_norm": 9.995100975036621, |
| "learning_rate": 8.817748015645558e-07, |
| "logits/chosen": 0.6416015625, |
| "logits/rejected": 1.0673828125, |
| "logps/chosen": -171.0, |
| "logps/rejected": -480.0, |
| "loss": 0.3054, |
| "rewards/accuracies": 0.7031250596046448, |
| "rewards/chosen": -0.794921875, |
| "rewards/margins": 1.8515625, |
| "rewards/rejected": -2.64453125, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.3446153846153846, |
| "grad_norm": 10.519892692565918, |
| "learning_rate": 8.681980515339463e-07, |
| "logits/chosen": 0.490234375, |
| "logits/rejected": 1.138671875, |
| "logps/chosen": -157.0, |
| "logps/rejected": -521.5, |
| "loss": 0.2719, |
| "rewards/accuracies": 0.7473958730697632, |
| "rewards/chosen": -0.8017578125, |
| "rewards/margins": 2.0703125, |
| "rewards/rejected": -2.87109375, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.3569230769230769, |
| "grad_norm": 8.758149147033691, |
| "learning_rate": 8.540155934270471e-07, |
| "logits/chosen": 0.5693359375, |
| "logits/rejected": 1.13671875, |
| "logps/chosen": -147.5, |
| "logps/rejected": -507.0, |
| "loss": 0.2637, |
| "rewards/accuracies": 0.7682291865348816, |
| "rewards/chosen": -0.7607421875, |
| "rewards/margins": 2.142578125, |
| "rewards/rejected": -2.90625, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.36923076923076925, |
| "grad_norm": 10.073951721191406, |
| "learning_rate": 8.392544243589427e-07, |
| "logits/chosen": 0.57568359375, |
| "logits/rejected": 1.0546875, |
| "logps/chosen": -142.625, |
| "logps/rejected": -466.5, |
| "loss": 0.2991, |
| "rewards/accuracies": 0.7161458730697632, |
| "rewards/chosen": -0.6943359375, |
| "rewards/margins": 1.986328125, |
| "rewards/rejected": -2.6796875, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.38153846153846155, |
| "grad_norm": 10.016172409057617, |
| "learning_rate": 8.239426430539243e-07, |
| "logits/chosen": 0.5673828125, |
| "logits/rejected": 1.103515625, |
| "logps/chosen": -181.5, |
| "logps/rejected": -497.5, |
| "loss": 0.2961, |
| "rewards/accuracies": 0.7239583730697632, |
| "rewards/chosen": -0.9736328125, |
| "rewards/margins": 2.001953125, |
| "rewards/rejected": -2.98046875, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.39384615384615385, |
| "grad_norm": 11.014229774475098, |
| "learning_rate": 8.081093963579707e-07, |
| "logits/chosen": 0.5009765625, |
| "logits/rejected": 1.044921875, |
| "logps/chosen": -142.875, |
| "logps/rejected": -502.5, |
| "loss": 0.2449, |
| "rewards/accuracies": 0.7682291865348816, |
| "rewards/chosen": -0.77734375, |
| "rewards/margins": 2.39453125, |
| "rewards/rejected": -3.17578125, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.40615384615384614, |
| "grad_norm": 8.632246971130371, |
| "learning_rate": 7.917848237560708e-07, |
| "logits/chosen": 0.7080078125, |
| "logits/rejected": 1.140625, |
| "logps/chosen": -171.5, |
| "logps/rejected": -491.5, |
| "loss": 0.2981, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.9248046875, |
| "rewards/margins": 2.107421875, |
| "rewards/rejected": -3.03125, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.41846153846153844, |
| "grad_norm": 7.112660884857178, |
| "learning_rate": 7.75e-07, |
| "logits/chosen": 0.7099609375, |
| "logits/rejected": 1.154296875, |
| "logps/chosen": -159.0, |
| "logps/rejected": -498.5, |
| "loss": 0.2759, |
| "rewards/accuracies": 0.7083333730697632, |
| "rewards/chosen": -0.888671875, |
| "rewards/margins": 2.30078125, |
| "rewards/rejected": -3.1875, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4307692307692308, |
| "grad_norm": 7.740782737731934, |
| "learning_rate": 7.577868759557653e-07, |
| "logits/chosen": 0.6611328125, |
| "logits/rejected": 1.142578125, |
| "logps/chosen": -161.5, |
| "logps/rejected": -512.5, |
| "loss": 0.2697, |
| "rewards/accuracies": 0.7213541865348816, |
| "rewards/chosen": -0.9482421875, |
| "rewards/margins": 2.302734375, |
| "rewards/rejected": -3.25390625, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.4430769230769231, |
| "grad_norm": 7.065237522125244, |
| "learning_rate": 7.401782177833147e-07, |
| "logits/chosen": 0.720703125, |
| "logits/rejected": 1.166015625, |
| "logps/chosen": -152.0, |
| "logps/rejected": -479.5, |
| "loss": 0.2701, |
| "rewards/accuracies": 0.7291667461395264, |
| "rewards/chosen": -0.8896484375, |
| "rewards/margins": 2.322265625, |
| "rewards/rejected": -3.21484375, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.4553846153846154, |
| "grad_norm": 7.172698497772217, |
| "learning_rate": 7.222075445642904e-07, |
| "logits/chosen": 0.64892578125, |
| "logits/rejected": 1.173828125, |
| "logps/chosen": -170.75, |
| "logps/rejected": -525.0, |
| "loss": 0.2379, |
| "rewards/accuracies": 0.7682291865348816, |
| "rewards/chosen": -1.0400390625, |
| "rewards/margins": 2.55078125, |
| "rewards/rejected": -3.58984375, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.4676923076923077, |
| "grad_norm": 5.642609596252441, |
| "learning_rate": 7.039090644965509e-07, |
| "logits/chosen": 0.712890625, |
| "logits/rejected": 1.16796875, |
| "logps/chosen": -177.75, |
| "logps/rejected": -507.5, |
| "loss": 0.2766, |
| "rewards/accuracies": 0.6927083730697632, |
| "rewards/chosen": -1.0498046875, |
| "rewards/margins": 2.4140625, |
| "rewards/rejected": -3.46484375, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 6.549254894256592, |
| "learning_rate": 6.853176097769228e-07, |
| "logits/chosen": 0.65869140625, |
| "logits/rejected": 1.166015625, |
| "logps/chosen": -166.25, |
| "logps/rejected": -504.5, |
| "loss": 0.2501, |
| "rewards/accuracies": 0.7447917461395264, |
| "rewards/chosen": -1.03515625, |
| "rewards/margins": 2.59375, |
| "rewards/rejected": -3.62890625, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.49230769230769234, |
| "grad_norm": 5.026706695556641, |
| "learning_rate": 6.664685702961344e-07, |
| "logits/chosen": 0.68359375, |
| "logits/rejected": 1.19140625, |
| "logps/chosen": -127.125, |
| "logps/rejected": -517.5, |
| "loss": 0.2026, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.8837890625, |
| "rewards/margins": 3.1328125, |
| "rewards/rejected": -4.0234375, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5046153846153846, |
| "grad_norm": 4.1700663566589355, |
| "learning_rate": 6.473978262721463e-07, |
| "logits/chosen": 0.59033203125, |
| "logits/rejected": 1.244140625, |
| "logps/chosen": -153.25, |
| "logps/rejected": -532.0, |
| "loss": 0.1867, |
| "rewards/accuracies": 0.8046875596046448, |
| "rewards/chosen": -1.08984375, |
| "rewards/margins": 3.46484375, |
| "rewards/rejected": -4.5546875, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5169230769230769, |
| "grad_norm": 3.9178483486175537, |
| "learning_rate": 6.281416799501187e-07, |
| "logits/chosen": 0.818359375, |
| "logits/rejected": 1.263671875, |
| "logps/chosen": -174.75, |
| "logps/rejected": -529.0, |
| "loss": 0.2116, |
| "rewards/accuracies": 0.7786458730697632, |
| "rewards/chosen": -1.330078125, |
| "rewards/margins": 3.5078125, |
| "rewards/rejected": -4.8359375, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.5292307692307693, |
| "grad_norm": 3.5173087120056152, |
| "learning_rate": 6.087367864990232e-07, |
| "logits/chosen": 0.7109375, |
| "logits/rejected": 1.23046875, |
| "logps/chosen": -156.25, |
| "logps/rejected": -506.0, |
| "loss": 0.2033, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.248046875, |
| "rewards/margins": 3.71875, |
| "rewards/rejected": -4.9609375, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.5415384615384615, |
| "grad_norm": 2.8550825119018555, |
| "learning_rate": 5.892200842364462e-07, |
| "logits/chosen": 0.681640625, |
| "logits/rejected": 1.26171875, |
| "logps/chosen": -144.25, |
| "logps/rejected": -520.0, |
| "loss": 0.21, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.22265625, |
| "rewards/margins": 4.05859375, |
| "rewards/rejected": -5.2890625, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.5538461538461539, |
| "grad_norm": 2.526937484741211, |
| "learning_rate": 5.696287243144012e-07, |
| "logits/chosen": 0.61083984375, |
| "logits/rejected": 1.2578125, |
| "logps/chosen": -157.375, |
| "logps/rejected": -501.5, |
| "loss": 0.2269, |
| "rewards/accuracies": 0.7213541865348816, |
| "rewards/chosen": -1.3583984375, |
| "rewards/margins": 4.03515625, |
| "rewards/rejected": -5.3984375, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.5661538461538461, |
| "grad_norm": 2.4299278259277344, |
| "learning_rate": 5.5e-07, |
| "logits/chosen": 0.80859375, |
| "logits/rejected": 1.255859375, |
| "logps/chosen": -173.5, |
| "logps/rejected": -535.0, |
| "loss": 0.2109, |
| "rewards/accuracies": 0.7500000596046448, |
| "rewards/chosen": -1.513671875, |
| "rewards/margins": 4.234375, |
| "rewards/rejected": -5.734375, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.5784615384615385, |
| "grad_norm": 2.101548671722412, |
| "learning_rate": 5.303712756855988e-07, |
| "logits/chosen": 0.8642578125, |
| "logits/rejected": 1.310546875, |
| "logps/chosen": -177.75, |
| "logps/rejected": -537.0, |
| "loss": 0.2036, |
| "rewards/accuracies": 0.7447916865348816, |
| "rewards/chosen": -1.63671875, |
| "rewards/margins": 4.3359375, |
| "rewards/rejected": -5.9609375, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.5907692307692308, |
| "grad_norm": 1.9781229496002197, |
| "learning_rate": 5.107799157635538e-07, |
| "logits/chosen": 0.6865234375, |
| "logits/rejected": 1.296875, |
| "logps/chosen": -149.125, |
| "logps/rejected": -538.0, |
| "loss": 0.1708, |
| "rewards/accuracies": 0.7916667461395264, |
| "rewards/chosen": -1.419921875, |
| "rewards/margins": 4.71875, |
| "rewards/rejected": -6.140625, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6030769230769231, |
| "grad_norm": 2.068936586380005, |
| "learning_rate": 4.912632135009769e-07, |
| "logits/chosen": 0.7412109375, |
| "logits/rejected": 1.248046875, |
| "logps/chosen": -161.25, |
| "logps/rejected": -499.5, |
| "loss": 0.2203, |
| "rewards/accuracies": 0.7265625596046448, |
| "rewards/chosen": -1.529296875, |
| "rewards/margins": 4.203125, |
| "rewards/rejected": -5.7421875, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 1.7143200635910034, |
| "learning_rate": 4.7185832004988133e-07, |
| "logits/chosen": 0.8076171875, |
| "logits/rejected": 1.306640625, |
| "logps/chosen": -178.0, |
| "logps/rejected": -528.0, |
| "loss": 0.2094, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -1.7734375, |
| "rewards/margins": 4.44921875, |
| "rewards/rejected": -6.2265625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6276923076923077, |
| "grad_norm": 2.1289665699005127, |
| "learning_rate": 4.526021737278537e-07, |
| "logits/chosen": 0.68408203125, |
| "logits/rejected": 1.27734375, |
| "logps/chosen": -160.0, |
| "logps/rejected": -536.0, |
| "loss": 0.1996, |
| "rewards/accuracies": 0.7526041865348816, |
| "rewards/chosen": -1.62109375, |
| "rewards/margins": 4.875, |
| "rewards/rejected": -6.5078125, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.698723554611206, |
| "learning_rate": 4.3353142970386557e-07, |
| "logits/chosen": 0.775390625, |
| "logits/rejected": 1.36328125, |
| "logps/chosen": -164.25, |
| "logps/rejected": -532.5, |
| "loss": 0.2009, |
| "rewards/accuracies": 0.7473958730697632, |
| "rewards/chosen": -1.6796875, |
| "rewards/margins": 4.734375, |
| "rewards/rejected": -6.4140625, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.6523076923076923, |
| "grad_norm": 2.5585391521453857, |
| "learning_rate": 4.146823902230772e-07, |
| "logits/chosen": 0.6572265625, |
| "logits/rejected": 1.208984375, |
| "logps/chosen": -167.5, |
| "logps/rejected": -526.5, |
| "loss": 0.1879, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.744140625, |
| "rewards/margins": 4.7734375, |
| "rewards/rejected": -6.5234375, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.6646153846153846, |
| "grad_norm": 1.9787291288375854, |
| "learning_rate": 3.9609093550344907e-07, |
| "logits/chosen": 0.7802734375, |
| "logits/rejected": 1.302734375, |
| "logps/chosen": -163.25, |
| "logps/rejected": -518.0, |
| "loss": 0.2042, |
| "rewards/accuracies": 0.7369791865348816, |
| "rewards/chosen": -1.787109375, |
| "rewards/margins": 4.7578125, |
| "rewards/rejected": -6.5546875, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.676923076923077, |
| "grad_norm": 2.1319005489349365, |
| "learning_rate": 3.777924554357096e-07, |
| "logits/chosen": 0.63671875, |
| "logits/rejected": 1.25390625, |
| "logps/chosen": -141.5, |
| "logps/rejected": -521.5, |
| "loss": 0.1732, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.494140625, |
| "rewards/margins": 5.03125, |
| "rewards/rejected": -6.5234375, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.6892307692307692, |
| "grad_norm": 1.4944871664047241, |
| "learning_rate": 3.5982178221668533e-07, |
| "logits/chosen": 0.71484375, |
| "logits/rejected": 1.357421875, |
| "logps/chosen": -169.25, |
| "logps/rejected": -543.5, |
| "loss": 0.1837, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.796875, |
| "rewards/margins": 5.0234375, |
| "rewards/rejected": -6.8203125, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7015384615384616, |
| "grad_norm": 1.6523363590240479, |
| "learning_rate": 3.4221312404423486e-07, |
| "logits/chosen": 0.861328125, |
| "logits/rejected": 1.2734375, |
| "logps/chosen": -170.75, |
| "logps/rejected": -543.0, |
| "loss": 0.1791, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.810546875, |
| "rewards/margins": 5.15625, |
| "rewards/rejected": -6.9609375, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7138461538461538, |
| "grad_norm": 1.6872831583023071, |
| "learning_rate": 3.250000000000001e-07, |
| "logits/chosen": 0.7626953125, |
| "logits/rejected": 1.36328125, |
| "logps/chosen": -162.5, |
| "logps/rejected": -554.0, |
| "loss": 0.17, |
| "rewards/accuracies": 0.7838542461395264, |
| "rewards/chosen": -1.70703125, |
| "rewards/margins": 5.390625, |
| "rewards/rejected": -7.1015625, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.7261538461538461, |
| "grad_norm": 1.9852259159088135, |
| "learning_rate": 3.0821517624392925e-07, |
| "logits/chosen": 0.66455078125, |
| "logits/rejected": 1.298828125, |
| "logps/chosen": -155.75, |
| "logps/rejected": -523.0, |
| "loss": 0.1743, |
| "rewards/accuracies": 0.7786458730697632, |
| "rewards/chosen": -1.720703125, |
| "rewards/margins": 5.1484375, |
| "rewards/rejected": -6.875, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.7384615384615385, |
| "grad_norm": 1.980146050453186, |
| "learning_rate": 2.918906036420294e-07, |
| "logits/chosen": 0.705078125, |
| "logits/rejected": 1.30859375, |
| "logps/chosen": -146.5, |
| "logps/rejected": -525.5, |
| "loss": 0.1735, |
| "rewards/accuracies": 0.7942708730697632, |
| "rewards/chosen": -1.58203125, |
| "rewards/margins": 5.171875, |
| "rewards/rejected": -6.7578125, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7507692307692307, |
| "grad_norm": 1.482957363128662, |
| "learning_rate": 2.760573569460757e-07, |
| "logits/chosen": 0.7333984375, |
| "logits/rejected": 1.376953125, |
| "logps/chosen": -164.25, |
| "logps/rejected": -546.5, |
| "loss": 0.1803, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.779296875, |
| "rewards/margins": 5.3046875, |
| "rewards/rejected": -7.09375, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.7630769230769231, |
| "grad_norm": 2.1725311279296875, |
| "learning_rate": 2.6074557564105724e-07, |
| "logits/chosen": 0.716796875, |
| "logits/rejected": 1.27734375, |
| "logps/chosen": -173.0, |
| "logps/rejected": -543.0, |
| "loss": 0.1792, |
| "rewards/accuracies": 0.7838542461395264, |
| "rewards/chosen": -1.921875, |
| "rewards/margins": 5.0390625, |
| "rewards/rejected": -6.96875, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.7753846153846153, |
| "grad_norm": 1.3990447521209717, |
| "learning_rate": 2.4598440657295286e-07, |
| "logits/chosen": 0.7734375, |
| "logits/rejected": 1.296875, |
| "logps/chosen": -160.5, |
| "logps/rejected": -497.5, |
| "loss": 0.2135, |
| "rewards/accuracies": 0.7161458730697632, |
| "rewards/chosen": -1.71484375, |
| "rewards/margins": 4.796875, |
| "rewards/rejected": -6.515625, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.7876923076923077, |
| "grad_norm": 1.7064610719680786, |
| "learning_rate": 2.3180194846605364e-07, |
| "logits/chosen": 0.8671875, |
| "logits/rejected": 1.419921875, |
| "logps/chosen": -174.25, |
| "logps/rejected": -543.0, |
| "loss": 0.1836, |
| "rewards/accuracies": 0.7708333730697632, |
| "rewards/chosen": -1.900390625, |
| "rewards/margins": 5.0703125, |
| "rewards/rejected": -6.984375, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.7583798170089722, |
| "learning_rate": 2.1822519843544422e-07, |
| "logits/chosen": 0.802734375, |
| "logits/rejected": 1.3515625, |
| "logps/chosen": -169.75, |
| "logps/rejected": -529.0, |
| "loss": 0.1883, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.83203125, |
| "rewards/margins": 4.96875, |
| "rewards/rejected": -6.8046875, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.8123076923076923, |
| "grad_norm": 1.5734552145004272, |
| "learning_rate": 2.0528000059645995e-07, |
| "logits/chosen": 0.6650390625, |
| "logits/rejected": 1.326171875, |
| "logps/chosen": -141.375, |
| "logps/rejected": -530.0, |
| "loss": 0.1778, |
| "rewards/accuracies": 0.7760416865348816, |
| "rewards/chosen": -1.548828125, |
| "rewards/margins": 5.390625, |
| "rewards/rejected": -6.9296875, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.8246153846153846, |
| "grad_norm": 1.7226340770721436, |
| "learning_rate": 1.929909968689442e-07, |
| "logits/chosen": 0.9169921875, |
| "logits/rejected": 1.41796875, |
| "logps/chosen": -159.5, |
| "logps/rejected": -536.0, |
| "loss": 0.2036, |
| "rewards/accuracies": 0.7369792461395264, |
| "rewards/chosen": -1.787109375, |
| "rewards/margins": 5.2890625, |
| "rewards/rejected": -7.0703125, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.8369230769230769, |
| "grad_norm": 2.157374143600464, |
| "learning_rate": 1.8138158006995363e-07, |
| "logits/chosen": 0.685546875, |
| "logits/rejected": 1.357421875, |
| "logps/chosen": -155.25, |
| "logps/rejected": -510.0, |
| "loss": 0.2231, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -1.68359375, |
| "rewards/margins": 4.96875, |
| "rewards/rejected": -6.65625, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.8492307692307692, |
| "grad_norm": 1.4800869226455688, |
| "learning_rate": 1.704738493842015e-07, |
| "logits/chosen": 0.796875, |
| "logits/rejected": 1.359375, |
| "logps/chosen": -149.125, |
| "logps/rejected": -525.5, |
| "loss": 0.1814, |
| "rewards/accuracies": 0.7682291865348816, |
| "rewards/chosen": -1.626953125, |
| "rewards/margins": 5.203125, |
| "rewards/rejected": -6.84375, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.8615384615384616, |
| "grad_norm": 1.577478051185608, |
| "learning_rate": 1.6028856829700258e-07, |
| "logits/chosen": 0.81640625, |
| "logits/rejected": 1.396484375, |
| "logps/chosen": -154.5, |
| "logps/rejected": -548.0, |
| "loss": 0.1663, |
| "rewards/accuracies": 0.7864583730697632, |
| "rewards/chosen": -1.65625, |
| "rewards/margins": 5.546875, |
| "rewards/rejected": -7.203125, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.8738461538461538, |
| "grad_norm": 1.6908918619155884, |
| "learning_rate": 1.5084512506980023e-07, |
| "logits/chosen": 0.7607421875, |
| "logits/rejected": 1.337890625, |
| "logps/chosen": -168.25, |
| "logps/rejected": -561.0, |
| "loss": 0.1641, |
| "rewards/accuracies": 0.7890625596046448, |
| "rewards/chosen": -1.83203125, |
| "rewards/margins": 5.578125, |
| "rewards/rejected": -7.4140625, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.8861538461538462, |
| "grad_norm": 1.6061334609985352, |
| "learning_rate": 1.4216149583350755e-07, |
| "logits/chosen": 0.7919921875, |
| "logits/rejected": 1.365234375, |
| "logps/chosen": -165.75, |
| "logps/rejected": -554.0, |
| "loss": 0.1649, |
| "rewards/accuracies": 0.7942708730697632, |
| "rewards/chosen": -1.82421875, |
| "rewards/margins": 5.546875, |
| "rewards/rejected": -7.3671875, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.8984615384615384, |
| "grad_norm": 1.3449442386627197, |
| "learning_rate": 1.3425421036992097e-07, |
| "logits/chosen": 0.6474609375, |
| "logits/rejected": 1.310546875, |
| "logps/chosen": -169.0, |
| "logps/rejected": -526.5, |
| "loss": 0.1973, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -1.79296875, |
| "rewards/margins": 5.03125, |
| "rewards/rejected": -6.8359375, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.9107692307692308, |
| "grad_norm": 1.48768150806427, |
| "learning_rate": 1.2713832064634125e-07, |
| "logits/chosen": 0.728515625, |
| "logits/rejected": 1.33984375, |
| "logps/chosen": -157.5, |
| "logps/rejected": -553.0, |
| "loss": 0.1701, |
| "rewards/accuracies": 0.7864583730697632, |
| "rewards/chosen": -1.78125, |
| "rewards/margins": 5.609375, |
| "rewards/rejected": -7.390625, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 1.579057216644287, |
| "learning_rate": 1.2082737216329792e-07, |
| "logits/chosen": 0.7705078125, |
| "logits/rejected": 1.341796875, |
| "logps/chosen": -167.75, |
| "logps/rejected": -554.0, |
| "loss": 0.1614, |
| "rewards/accuracies": 0.7942708730697632, |
| "rewards/chosen": -1.791015625, |
| "rewards/margins": 5.46875, |
| "rewards/rejected": -7.25, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.9353846153846154, |
| "grad_norm": 1.7830864191055298, |
| "learning_rate": 1.1533337816991931e-07, |
| "logits/chosen": 0.767578125, |
| "logits/rejected": 1.3515625, |
| "logps/chosen": -165.5, |
| "logps/rejected": -548.5, |
| "loss": 0.1884, |
| "rewards/accuracies": 0.7552083730697632, |
| "rewards/chosen": -1.814453125, |
| "rewards/margins": 5.453125, |
| "rewards/rejected": -7.265625, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.9476923076923077, |
| "grad_norm": 2.3972299098968506, |
| "learning_rate": 1.1066679679602998e-07, |
| "logits/chosen": 0.7197265625, |
| "logits/rejected": 1.322265625, |
| "logps/chosen": -154.5, |
| "logps/rejected": -531.5, |
| "loss": 0.1927, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.615234375, |
| "rewards/margins": 5.3984375, |
| "rewards/rejected": -7.0078125, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.6976590156555176, |
| "learning_rate": 1.068365111445064e-07, |
| "logits/chosen": 0.80078125, |
| "logits/rejected": 1.349609375, |
| "logps/chosen": -167.125, |
| "logps/rejected": -545.5, |
| "loss": 0.1756, |
| "rewards/accuracies": 0.7734375596046448, |
| "rewards/chosen": -1.791015625, |
| "rewards/margins": 5.3828125, |
| "rewards/rejected": -7.1875, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.9723076923076923, |
| "grad_norm": 1.3769242763519287, |
| "learning_rate": 1.0384981238178533e-07, |
| "logits/chosen": 0.779296875, |
| "logits/rejected": 1.40234375, |
| "logps/chosen": -174.375, |
| "logps/rejected": -547.5, |
| "loss": 0.1883, |
| "rewards/accuracies": 0.7630208730697632, |
| "rewards/chosen": -1.923828125, |
| "rewards/margins": 5.3828125, |
| "rewards/rejected": -7.296875, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.9846153846153847, |
| "grad_norm": 2.270617961883545, |
| "learning_rate": 1.017123858587145e-07, |
| "logits/chosen": 0.869140625, |
| "logits/rejected": 1.3125, |
| "logps/chosen": -171.875, |
| "logps/rejected": -499.5, |
| "loss": 0.2301, |
| "rewards/accuracies": 0.7239583730697632, |
| "rewards/chosen": -1.837890625, |
| "rewards/margins": 4.72265625, |
| "rewards/rejected": -6.5703125, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9969230769230769, |
| "grad_norm": 1.290389060974121, |
| "learning_rate": 1.0042830028816399e-07, |
| "logits/chosen": 0.8828125, |
| "logits/rejected": 1.416015625, |
| "logps/chosen": -194.0, |
| "logps/rejected": -567.0, |
| "loss": 0.1782, |
| "rewards/accuracies": 0.7708333730697632, |
| "rewards/chosen": -2.130859375, |
| "rewards/margins": 5.390625, |
| "rewards/rejected": -7.5234375, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.9969230769230769, |
| "step": 81, |
| "total_flos": 0.0, |
| "train_loss": 0.31301942670050964, |
| "train_runtime": 1893.6704, |
| "train_samples_per_second": 16.472, |
| "train_steps_per_second": 0.043 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 81, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|