| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 468, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02564102564102564, |
| "grad_norm": 1.8807990550994873, |
| "learning_rate": 2e-07, |
| "logits/chosen": 0.13671875, |
| "logits/rejected": 0.1728515625, |
| "logps/chosen": -109.5, |
| "logps/rejected": -122.5, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.0416666679084301, |
| "rewards/chosen": 0.00104522705078125, |
| "rewards/margins": -0.0031280517578125, |
| "rewards/rejected": 0.004180908203125, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 1.8764904737472534, |
| "learning_rate": 4e-07, |
| "logits/chosen": 0.29296875, |
| "logits/rejected": 0.345703125, |
| "logps/chosen": -106.0, |
| "logps/rejected": -128.0, |
| "loss": 0.6943, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.000518798828125, |
| "rewards/margins": -0.007049560546875, |
| "rewards/rejected": 0.006500244140625, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 1.893288493156433, |
| "learning_rate": 6e-07, |
| "logits/chosen": 0.1279296875, |
| "logits/rejected": 0.1923828125, |
| "logps/chosen": -99.0, |
| "logps/rejected": -110.0, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.2916666567325592, |
| "rewards/chosen": 0.0036468505859375, |
| "rewards/margins": 0.00494384765625, |
| "rewards/rejected": -0.0012969970703125, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 1.7804158926010132, |
| "learning_rate": 8e-07, |
| "logits/chosen": 0.28515625, |
| "logits/rejected": 0.302734375, |
| "logps/chosen": -119.0, |
| "logps/rejected": -128.0, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.1666666716337204, |
| "rewards/chosen": -0.0078125, |
| "rewards/margins": -0.01214599609375, |
| "rewards/rejected": 0.004302978515625, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 1.8483916521072388, |
| "learning_rate": 1e-06, |
| "logits/chosen": 0.2392578125, |
| "logits/rejected": 0.2021484375, |
| "logps/chosen": -99.5, |
| "logps/rejected": -119.5, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.0026092529296875, |
| "rewards/margins": -0.0130615234375, |
| "rewards/rejected": 0.015625, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 1.9503748416900635, |
| "learning_rate": 1.2e-06, |
| "logits/chosen": 0.197265625, |
| "logits/rejected": 0.1669921875, |
| "logps/chosen": -122.0, |
| "logps/rejected": -141.0, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.2916666567325592, |
| "rewards/chosen": 0.00653076171875, |
| "rewards/margins": 0.01171875, |
| "rewards/rejected": -0.005218505859375, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1794871794871795, |
| "grad_norm": 1.7854230403900146, |
| "learning_rate": 1.4e-06, |
| "logits/chosen": 0.208984375, |
| "logits/rejected": 0.25, |
| "logps/chosen": -106.0, |
| "logps/rejected": -108.5, |
| "loss": 0.6872, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00182342529296875, |
| "rewards/margins": 0.01092529296875, |
| "rewards/rejected": -0.01275634765625, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 2.020820379257202, |
| "learning_rate": 1.6e-06, |
| "logits/chosen": 0.294921875, |
| "logits/rejected": 0.345703125, |
| "logps/chosen": -120.0, |
| "logps/rejected": -123.0, |
| "loss": 0.6888, |
| "rewards/accuracies": 0.3333333432674408, |
| "rewards/chosen": -0.007171630859375, |
| "rewards/margins": -0.00194549560546875, |
| "rewards/rejected": -0.00518798828125, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 2.045447826385498, |
| "learning_rate": 1.8e-06, |
| "logits/chosen": 0.20703125, |
| "logits/rejected": 0.271484375, |
| "logps/chosen": -113.0, |
| "logps/rejected": -126.0, |
| "loss": 0.6945, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.00885009765625, |
| "rewards/margins": -0.00469970703125, |
| "rewards/rejected": -0.004180908203125, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 2.0050930976867676, |
| "learning_rate": 2e-06, |
| "logits/chosen": 0.19921875, |
| "logits/rejected": 0.27734375, |
| "logps/chosen": -122.5, |
| "logps/rejected": -145.0, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.2083333283662796, |
| "rewards/chosen": -0.004180908203125, |
| "rewards/margins": -0.00311279296875, |
| "rewards/rejected": -0.00103759765625, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28205128205128205, |
| "grad_norm": 2.0118536949157715, |
| "learning_rate": 1.9998558164028463e-06, |
| "logits/chosen": 0.1953125, |
| "logits/rejected": 0.1748046875, |
| "logps/chosen": -124.0, |
| "logps/rejected": -143.0, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.2916666567325592, |
| "rewards/chosen": -0.0020904541015625, |
| "rewards/margins": -0.004058837890625, |
| "rewards/rejected": 0.0019683837890625, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 1.921610951423645, |
| "learning_rate": 1.9994233071892054e-06, |
| "logits/chosen": 0.142578125, |
| "logits/rejected": 0.1748046875, |
| "logps/chosen": -100.5, |
| "logps/rejected": -126.0, |
| "loss": 0.6862, |
| "rewards/accuracies": 0.4166666567325592, |
| "rewards/chosen": 0.0059814453125, |
| "rewards/margins": 0.0289306640625, |
| "rewards/rejected": -0.02294921875, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1.9476183652877808, |
| "learning_rate": 1.998702597080545e-06, |
| "logits/chosen": 0.1533203125, |
| "logits/rejected": 0.1689453125, |
| "logps/chosen": -134.0, |
| "logps/rejected": -134.0, |
| "loss": 0.687, |
| "rewards/accuracies": 0.2916666567325592, |
| "rewards/chosen": -0.0093994140625, |
| "rewards/margins": 0.0093994140625, |
| "rewards/rejected": -0.018798828125, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 2.0101871490478516, |
| "learning_rate": 1.997693893906017e-06, |
| "logits/chosen": 0.314453125, |
| "logits/rejected": 0.30078125, |
| "logps/chosen": -112.0, |
| "logps/rejected": -112.0, |
| "loss": 0.6868, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0106201171875, |
| "rewards/margins": 0.0013427734375, |
| "rewards/rejected": -0.01202392578125, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 1.790059208869934, |
| "learning_rate": 1.996397488542526e-06, |
| "logits/chosen": 0.189453125, |
| "logits/rejected": 0.251953125, |
| "logps/chosen": -128.0, |
| "logps/rejected": -140.0, |
| "loss": 0.6833, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": -0.0096435546875, |
| "rewards/margins": 0.015380859375, |
| "rewards/rejected": -0.0250244140625, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 1.9502273797988892, |
| "learning_rate": 1.99481375483085e-06, |
| "logits/chosen": 0.267578125, |
| "logits/rejected": 0.2578125, |
| "logps/chosen": -105.5, |
| "logps/rejected": -108.0, |
| "loss": 0.6838, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.01458740234375, |
| "rewards/margins": 0.01141357421875, |
| "rewards/rejected": -0.026123046875, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.4358974358974359, |
| "grad_norm": 2.0794284343719482, |
| "learning_rate": 1.992943149467835e-06, |
| "logits/chosen": 0.10986328125, |
| "logits/rejected": 0.205078125, |
| "logps/chosen": -121.0, |
| "logps/rejected": -128.0, |
| "loss": 0.6795, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0291748046875, |
| "rewards/rejected": -0.029296875, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 1.9881861209869385, |
| "learning_rate": 1.9907862118747023e-06, |
| "logits/chosen": 0.1796875, |
| "logits/rejected": 0.1865234375, |
| "logps/chosen": -130.0, |
| "logps/rejected": -132.0, |
| "loss": 0.6852, |
| "rewards/accuracies": 0.4166666567325592, |
| "rewards/chosen": -0.0022125244140625, |
| "rewards/margins": 0.0228271484375, |
| "rewards/rejected": -0.0250244140625, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.48717948717948717, |
| "grad_norm": 1.941139817237854, |
| "learning_rate": 1.988343564041492e-06, |
| "logits/chosen": 0.2080078125, |
| "logits/rejected": 0.24609375, |
| "logps/chosen": -116.0, |
| "logps/rejected": -131.0, |
| "loss": 0.6751, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": 0.023681640625, |
| "rewards/margins": 0.0615234375, |
| "rewards/rejected": -0.03759765625, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 1.9498776197433472, |
| "learning_rate": 1.9856159103477083e-06, |
| "logits/chosen": 0.2041015625, |
| "logits/rejected": 0.193359375, |
| "logps/chosen": -105.5, |
| "logps/rejected": -110.0, |
| "loss": 0.6815, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": -0.006744384765625, |
| "rewards/margins": 0.01123046875, |
| "rewards/rejected": -0.0179443359375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 2.001382827758789, |
| "learning_rate": 1.9826040373591932e-06, |
| "logits/chosen": 0.25, |
| "logits/rejected": 0.27734375, |
| "logps/chosen": -91.0, |
| "logps/rejected": -98.5, |
| "loss": 0.6746, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": 0.01214599609375, |
| "rewards/margins": 0.03857421875, |
| "rewards/rejected": -0.0263671875, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 2.089869737625122, |
| "learning_rate": 1.97930881360131e-06, |
| "logits/chosen": 0.1513671875, |
| "logits/rejected": 0.1220703125, |
| "logps/chosen": -126.5, |
| "logps/rejected": -130.0, |
| "loss": 0.6694, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.00156402587890625, |
| "rewards/margins": 0.048583984375, |
| "rewards/rejected": -0.050048828125, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5897435897435898, |
| "grad_norm": 1.9758906364440918, |
| "learning_rate": 1.9757311893084885e-06, |
| "logits/chosen": 0.298828125, |
| "logits/rejected": 0.203125, |
| "logps/chosen": -102.5, |
| "logps/rejected": -115.0, |
| "loss": 0.6667, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.027099609375, |
| "rewards/margins": 0.0250244140625, |
| "rewards/rejected": -0.05224609375, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 1.9816081523895264, |
| "learning_rate": 1.971872196150208e-06, |
| "logits/chosen": 0.259765625, |
| "logits/rejected": 0.328125, |
| "logps/chosen": -87.5, |
| "logps/rejected": -97.0, |
| "loss": 0.6711, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.00885009765625, |
| "rewards/margins": 0.035400390625, |
| "rewards/rejected": -0.04443359375, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 1.8845876455307007, |
| "learning_rate": 1.967732946933499e-06, |
| "logits/chosen": 0.2373046875, |
| "logits/rejected": 0.23828125, |
| "logps/chosen": -117.0, |
| "logps/rejected": -110.5, |
| "loss": 0.6676, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0025787353515625, |
| "rewards/margins": 0.057373046875, |
| "rewards/rejected": -0.06005859375, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.8938732147216797, |
| "learning_rate": 1.963314635282044e-06, |
| "logits/chosen": 0.263671875, |
| "logits/rejected": 0.2294921875, |
| "logps/chosen": -108.5, |
| "logps/rejected": -120.5, |
| "loss": 0.6626, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01300048828125, |
| "rewards/margins": 0.0634765625, |
| "rewards/rejected": -0.07666015625, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 1.905861258506775, |
| "learning_rate": 1.9586185352919775e-06, |
| "logits/chosen": 0.181640625, |
| "logits/rejected": 0.275390625, |
| "logps/chosen": -113.5, |
| "logps/rejected": -114.0, |
| "loss": 0.6603, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": 0.00830078125, |
| "rewards/margins": 0.06103515625, |
| "rewards/rejected": -0.052734375, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 2.1271400451660156, |
| "learning_rate": 1.9536460011644787e-06, |
| "logits/chosen": 0.208984375, |
| "logits/rejected": 0.2373046875, |
| "logps/chosen": -112.0, |
| "logps/rejected": -122.0, |
| "loss": 0.6602, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.007568359375, |
| "rewards/margins": 0.036865234375, |
| "rewards/rejected": -0.04443359375, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7435897435897436, |
| "grad_norm": 1.9534716606140137, |
| "learning_rate": 1.9483984668152616e-06, |
| "logits/chosen": 0.1650390625, |
| "logits/rejected": 0.240234375, |
| "logps/chosen": -110.0, |
| "logps/rejected": -102.0, |
| "loss": 0.6654, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.0137939453125, |
| "rewards/margins": 0.043212890625, |
| "rewards/rejected": -0.05712890625, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 2.09041166305542, |
| "learning_rate": 1.942877445461084e-06, |
| "logits/chosen": 0.1435546875, |
| "logits/rejected": 0.1845703125, |
| "logps/chosen": -128.0, |
| "logps/rejected": -136.0, |
| "loss": 0.6473, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.000507354736328125, |
| "rewards/margins": 0.09765625, |
| "rewards/rejected": -0.09716796875, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7948717948717948, |
| "grad_norm": 2.079911708831787, |
| "learning_rate": 1.9370845291833836e-06, |
| "logits/chosen": 0.1572265625, |
| "logits/rejected": 0.2158203125, |
| "logps/chosen": -109.0, |
| "logps/rejected": -123.0, |
| "loss": 0.6346, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.0223388671875, |
| "rewards/margins": 0.109375, |
| "rewards/rejected": -0.1318359375, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 1.9027904272079468, |
| "learning_rate": 1.9310213884691736e-06, |
| "logits/chosen": 0.130859375, |
| "logits/rejected": 0.19140625, |
| "logps/chosen": -135.0, |
| "logps/rejected": -147.0, |
| "loss": 0.6463, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.03271484375, |
| "rewards/margins": 0.11474609375, |
| "rewards/rejected": -0.1474609375, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 2.1986587047576904, |
| "learning_rate": 1.924689771729331e-06, |
| "logits/chosen": 0.224609375, |
| "logits/rejected": 0.1943359375, |
| "logps/chosen": -96.5, |
| "logps/rejected": -108.0, |
| "loss": 0.634, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0361328125, |
| "rewards/margins": 0.07275390625, |
| "rewards/rejected": -0.10888671875, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 1.9795209169387817, |
| "learning_rate": 1.918091504794411e-06, |
| "logits/chosen": 0.251953125, |
| "logits/rejected": 0.2177734375, |
| "logps/chosen": -125.5, |
| "logps/rejected": -132.0, |
| "loss": 0.6447, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0240478515625, |
| "rewards/margins": 0.1845703125, |
| "rewards/rejected": -0.208984375, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 2.002471923828125, |
| "learning_rate": 1.9112284903881357e-06, |
| "logits/chosen": 0.228515625, |
| "logits/rejected": 0.25, |
| "logps/chosen": -105.5, |
| "logps/rejected": -117.0, |
| "loss": 0.6333, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.0177001953125, |
| "rewards/margins": 0.126953125, |
| "rewards/rejected": -0.14453125, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 1.976642370223999, |
| "learning_rate": 1.9041027075787146e-06, |
| "logits/chosen": 0.216796875, |
| "logits/rejected": 0.185546875, |
| "logps/chosen": -111.0, |
| "logps/rejected": -133.0, |
| "loss": 0.6283, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.06884765625, |
| "rewards/margins": 0.1318359375, |
| "rewards/rejected": -0.201171875, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.9487179487179487, |
| "grad_norm": 2.043111562728882, |
| "learning_rate": 1.8967162112081435e-06, |
| "logits/chosen": 0.1640625, |
| "logits/rejected": 0.1767578125, |
| "logps/chosen": -147.0, |
| "logps/rejected": -144.0, |
| "loss": 0.6125, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.06494140625, |
| "rewards/margins": 0.2373046875, |
| "rewards/rejected": -0.302734375, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 2.0380313396453857, |
| "learning_rate": 1.8890711312996568e-06, |
| "logits/chosen": 0.353515625, |
| "logits/rejected": 0.353515625, |
| "logps/chosen": -110.0, |
| "logps/rejected": -113.0, |
| "loss": 0.6395, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": -0.091796875, |
| "rewards/margins": 0.07666015625, |
| "rewards/rejected": -0.16796875, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.0977935791015625, |
| "learning_rate": 1.881169672443498e-06, |
| "logits/chosen": 0.2275390625, |
| "logits/rejected": 0.2373046875, |
| "logps/chosen": -104.0, |
| "logps/rejected": -128.0, |
| "loss": 0.6076, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.078125, |
| "rewards/margins": 0.2041015625, |
| "rewards/rejected": -0.283203125, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.0256410256410255, |
| "grad_norm": 2.0318846702575684, |
| "learning_rate": 1.873014113161188e-06, |
| "logits/chosen": 0.2060546875, |
| "logits/rejected": 0.1337890625, |
| "logps/chosen": -114.5, |
| "logps/rejected": -120.0, |
| "loss": 0.5905, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.1025390625, |
| "rewards/margins": 0.23828125, |
| "rewards/rejected": -0.341796875, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0512820512820513, |
| "grad_norm": 1.9920344352722168, |
| "learning_rate": 1.8646068052484753e-06, |
| "logits/chosen": 0.1767578125, |
| "logits/rejected": 0.1533203125, |
| "logps/chosen": -120.5, |
| "logps/rejected": -128.0, |
| "loss": 0.6056, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.0194091796875, |
| "rewards/margins": 0.248046875, |
| "rewards/rejected": -0.267578125, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 1.8763842582702637, |
| "learning_rate": 1.8559501730971543e-06, |
| "logits/chosen": 0.220703125, |
| "logits/rejected": 0.298828125, |
| "logps/chosen": -108.0, |
| "logps/rejected": -131.0, |
| "loss": 0.5767, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.11181640625, |
| "rewards/margins": 0.361328125, |
| "rewards/rejected": -0.47265625, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.1025641025641026, |
| "grad_norm": 1.9118515253067017, |
| "learning_rate": 1.8470467129959508e-06, |
| "logits/chosen": 0.2431640625, |
| "logits/rejected": 0.236328125, |
| "logps/chosen": -98.5, |
| "logps/rejected": -120.5, |
| "loss": 0.578, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.06982421875, |
| "rewards/margins": 0.404296875, |
| "rewards/rejected": -0.47265625, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.1282051282051282, |
| "grad_norm": 2.0091183185577393, |
| "learning_rate": 1.8378989924106735e-06, |
| "logits/chosen": 0.1865234375, |
| "logits/rejected": 0.25, |
| "logps/chosen": -111.5, |
| "logps/rejected": -124.0, |
| "loss": 0.5826, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.10302734375, |
| "rewards/margins": 0.28515625, |
| "rewards/rejected": -0.388671875, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 2.098787307739258, |
| "learning_rate": 1.828509649243842e-06, |
| "logits/chosen": 0.09521484375, |
| "logits/rejected": 0.091796875, |
| "logps/chosen": -127.0, |
| "logps/rejected": -135.0, |
| "loss": 0.5577, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.0986328125, |
| "rewards/margins": 0.255859375, |
| "rewards/rejected": -0.353515625, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.1794871794871795, |
| "grad_norm": 1.9569436311721802, |
| "learning_rate": 1.8188813910740017e-06, |
| "logits/chosen": 0.251953125, |
| "logits/rejected": 0.1982421875, |
| "logps/chosen": -111.0, |
| "logps/rejected": -140.0, |
| "loss": 0.5628, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.1591796875, |
| "rewards/margins": 0.400390625, |
| "rewards/rejected": -0.55859375, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.205128205128205, |
| "grad_norm": 2.0690457820892334, |
| "learning_rate": 1.8090169943749474e-06, |
| "logits/chosen": 0.2041015625, |
| "logits/rejected": 0.1884765625, |
| "logps/chosen": -110.5, |
| "logps/rejected": -123.0, |
| "loss": 0.5893, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.298828125, |
| "rewards/margins": 0.1181640625, |
| "rewards/rejected": -0.416015625, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 2.074464797973633, |
| "learning_rate": 1.7989193037150782e-06, |
| "logits/chosen": 0.26953125, |
| "logits/rejected": 0.265625, |
| "logps/chosen": -110.0, |
| "logps/rejected": -128.0, |
| "loss": 0.5638, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.2412109375, |
| "rewards/margins": 0.314453125, |
| "rewards/rejected": -0.5546875, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.2564102564102564, |
| "grad_norm": 2.286728858947754, |
| "learning_rate": 1.788591230937119e-06, |
| "logits/chosen": 0.09619140625, |
| "logits/rejected": 0.06689453125, |
| "logps/chosen": -113.0, |
| "logps/rejected": -136.0, |
| "loss": 0.5804, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.26171875, |
| "rewards/margins": 0.259765625, |
| "rewards/rejected": -0.5234375, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.282051282051282, |
| "grad_norm": 2.0626838207244873, |
| "learning_rate": 1.7780357543184393e-06, |
| "logits/chosen": 0.1767578125, |
| "logits/rejected": 0.185546875, |
| "logps/chosen": -121.0, |
| "logps/rejected": -115.5, |
| "loss": 0.5309, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.2236328125, |
| "rewards/margins": 0.314453125, |
| "rewards/rejected": -0.5390625, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 2.1246135234832764, |
| "learning_rate": 1.7672559177122163e-06, |
| "logits/chosen": 0.19140625, |
| "logits/rejected": 0.1806640625, |
| "logps/chosen": -118.0, |
| "logps/rejected": -148.0, |
| "loss": 0.5572, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.31640625, |
| "rewards/margins": 0.427734375, |
| "rewards/rejected": -0.7421875, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.9632675647735596, |
| "learning_rate": 1.7562548296696873e-06, |
| "logits/chosen": 0.2138671875, |
| "logits/rejected": 0.220703125, |
| "logps/chosen": -103.0, |
| "logps/rejected": -125.5, |
| "loss": 0.5375, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.2255859375, |
| "rewards/margins": 0.353515625, |
| "rewards/rejected": -0.578125, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.358974358974359, |
| "grad_norm": 1.9024431705474854, |
| "learning_rate": 1.745035662543745e-06, |
| "logits/chosen": 0.1787109375, |
| "logits/rejected": 0.1591796875, |
| "logps/chosen": -116.5, |
| "logps/rejected": -146.0, |
| "loss": 0.5115, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.220703125, |
| "rewards/margins": 0.58203125, |
| "rewards/rejected": -0.8046875, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 2.3646910190582275, |
| "learning_rate": 1.7336016515741365e-06, |
| "logits/chosen": 0.189453125, |
| "logits/rejected": 0.1572265625, |
| "logps/chosen": -132.0, |
| "logps/rejected": -156.0, |
| "loss": 0.54, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.2734375, |
| "rewards/margins": 0.64453125, |
| "rewards/rejected": -0.91796875, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.4102564102564101, |
| "grad_norm": 2.328059434890747, |
| "learning_rate": 1.7219560939545242e-06, |
| "logits/chosen": 0.232421875, |
| "logits/rejected": 0.2294921875, |
| "logps/chosen": -96.0, |
| "logps/rejected": -109.0, |
| "loss": 0.5638, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.298828125, |
| "rewards/margins": 0.41015625, |
| "rewards/rejected": -0.7109375, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.435897435897436, |
| "grad_norm": 2.036503791809082, |
| "learning_rate": 1.7101023478816856e-06, |
| "logits/chosen": 0.1669921875, |
| "logits/rejected": 0.2060546875, |
| "logps/chosen": -132.0, |
| "logps/rejected": -131.0, |
| "loss": 0.5173, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.4140625, |
| "rewards/margins": 0.412109375, |
| "rewards/rejected": -0.82421875, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 2.166957378387451, |
| "learning_rate": 1.6980438315871177e-06, |
| "logits/chosen": 0.1669921875, |
| "logits/rejected": 0.2021484375, |
| "logps/chosen": -113.0, |
| "logps/rejected": -119.5, |
| "loss": 0.5193, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.26953125, |
| "rewards/margins": 0.51953125, |
| "rewards/rejected": -0.7890625, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.4871794871794872, |
| "grad_norm": 2.383507490158081, |
| "learning_rate": 1.6857840223513313e-06, |
| "logits/chosen": 0.2060546875, |
| "logits/rejected": 0.19921875, |
| "logps/chosen": -120.0, |
| "logps/rejected": -124.0, |
| "loss": 0.5446, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.361328125, |
| "rewards/margins": 0.431640625, |
| "rewards/rejected": -0.79296875, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.5128205128205128, |
| "grad_norm": 2.0628299713134766, |
| "learning_rate": 1.6733264555011194e-06, |
| "logits/chosen": 0.244140625, |
| "logits/rejected": 0.20703125, |
| "logps/chosen": -137.0, |
| "logps/rejected": -154.0, |
| "loss": 0.5155, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.57421875, |
| "rewards/margins": 0.73828125, |
| "rewards/rejected": -1.3125, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 2.3130903244018555, |
| "learning_rate": 1.6606747233900813e-06, |
| "logits/chosen": 0.2431640625, |
| "logits/rejected": 0.2451171875, |
| "logps/chosen": -96.0, |
| "logps/rejected": -111.5, |
| "loss": 0.5542, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.2294921875, |
| "rewards/margins": 0.7109375, |
| "rewards/rejected": -0.9375, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.564102564102564, |
| "grad_norm": 2.157144784927368, |
| "learning_rate": 1.64783247436271e-06, |
| "logits/chosen": 0.298828125, |
| "logits/rejected": 0.314453125, |
| "logps/chosen": -124.0, |
| "logps/rejected": -143.0, |
| "loss": 0.5016, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.408203125, |
| "rewards/margins": 0.6015625, |
| "rewards/rejected": -1.015625, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.5897435897435899, |
| "grad_norm": 2.006909132003784, |
| "learning_rate": 1.6348034117023256e-06, |
| "logits/chosen": 0.140625, |
| "logits/rejected": 0.1650390625, |
| "logps/chosen": -109.0, |
| "logps/rejected": -137.0, |
| "loss": 0.5129, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.3984375, |
| "rewards/margins": 0.703125, |
| "rewards/rejected": -1.1015625, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 2.5358331203460693, |
| "learning_rate": 1.6215912925631721e-06, |
| "logits/chosen": 0.1962890625, |
| "logits/rejected": 0.21484375, |
| "logps/chosen": -114.0, |
| "logps/rejected": -134.0, |
| "loss": 0.5205, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.478515625, |
| "rewards/margins": 0.74609375, |
| "rewards/rejected": -1.2265625, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.641025641025641, |
| "grad_norm": 2.1718602180480957, |
| "learning_rate": 1.6081999268869763e-06, |
| "logits/chosen": 0.294921875, |
| "logits/rejected": 0.3203125, |
| "logps/chosen": -109.0, |
| "logps/rejected": -130.0, |
| "loss": 0.4935, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.5625, |
| "rewards/margins": 0.50390625, |
| "rewards/rejected": -1.0703125, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 2.2296667098999023, |
| "learning_rate": 1.5946331763042866e-06, |
| "logits/chosen": 0.1357421875, |
| "logits/rejected": 0.2294921875, |
| "logps/chosen": -122.0, |
| "logps/rejected": -114.5, |
| "loss": 0.5057, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.53515625, |
| "rewards/margins": 0.50390625, |
| "rewards/rejected": -1.0390625, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 2.204519748687744, |
| "learning_rate": 1.580894953020904e-06, |
| "logits/chosen": 0.1435546875, |
| "logits/rejected": 0.150390625, |
| "logps/chosen": -113.5, |
| "logps/rejected": -135.0, |
| "loss": 0.5566, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.421875, |
| "rewards/margins": 0.76171875, |
| "rewards/rejected": -1.1875, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.717948717948718, |
| "grad_norm": 2.3120129108428955, |
| "learning_rate": 1.5669892186897316e-06, |
| "logits/chosen": 0.1572265625, |
| "logits/rejected": 0.208984375, |
| "logps/chosen": -124.0, |
| "logps/rejected": -141.0, |
| "loss": 0.4966, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.53515625, |
| "rewards/margins": 1.0, |
| "rewards/rejected": -1.5390625, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.7435897435897436, |
| "grad_norm": 2.3819029331207275, |
| "learning_rate": 1.5529199832683633e-06, |
| "logits/chosen": 0.171875, |
| "logits/rejected": 0.2236328125, |
| "logps/chosen": -128.0, |
| "logps/rejected": -150.0, |
| "loss": 0.4868, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.64453125, |
| "rewards/margins": 0.7734375, |
| "rewards/rejected": -1.421875, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 2.1900620460510254, |
| "learning_rate": 1.5386913038627438e-06, |
| "logits/chosen": 0.1767578125, |
| "logits/rejected": 0.1796875, |
| "logps/chosen": -152.0, |
| "logps/rejected": -139.0, |
| "loss": 0.5002, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.875, |
| "rewards/margins": 0.69921875, |
| "rewards/rejected": -1.5703125, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.7948717948717947, |
| "grad_norm": 2.0419483184814453, |
| "learning_rate": 1.5243072835572316e-06, |
| "logits/chosen": 0.158203125, |
| "logits/rejected": 0.1796875, |
| "logps/chosen": -128.0, |
| "logps/rejected": -138.0, |
| "loss": 0.4862, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.69140625, |
| "rewards/margins": 0.70703125, |
| "rewards/rejected": -1.3984375, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.8205128205128205, |
| "grad_norm": 2.071697473526001, |
| "learning_rate": 1.5097720702314054e-06, |
| "logits/chosen": 0.11083984375, |
| "logits/rejected": 0.16015625, |
| "logps/chosen": -129.0, |
| "logps/rejected": -157.0, |
| "loss": 0.4742, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.66015625, |
| "rewards/margins": 1.140625, |
| "rewards/rejected": -1.8046875, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 2.228717088699341, |
| "learning_rate": 1.4950898553639504e-06, |
| "logits/chosen": 0.06396484375, |
| "logits/rejected": 0.173828125, |
| "logps/chosen": -137.0, |
| "logps/rejected": -140.0, |
| "loss": 0.4845, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.69140625, |
| "rewards/margins": 0.8125, |
| "rewards/rejected": -1.5078125, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.8717948717948718, |
| "grad_norm": 2.5235626697540283, |
| "learning_rate": 1.4802648728239743e-06, |
| "logits/chosen": 0.2421875, |
| "logits/rejected": 0.197265625, |
| "logps/chosen": -98.0, |
| "logps/rejected": -112.5, |
| "loss": 0.5188, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.63671875, |
| "rewards/margins": 0.369140625, |
| "rewards/rejected": -1.0, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.8974358974358974, |
| "grad_norm": 2.2643330097198486, |
| "learning_rate": 1.4653013976500974e-06, |
| "logits/chosen": 0.203125, |
| "logits/rejected": 0.2080078125, |
| "logps/chosen": -143.0, |
| "logps/rejected": -159.0, |
| "loss": 0.4687, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.875, |
| "rewards/margins": 0.8984375, |
| "rewards/rejected": -1.7734375, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 2.175093650817871, |
| "learning_rate": 1.4502037448176732e-06, |
| "logits/chosen": 0.138671875, |
| "logits/rejected": 0.1689453125, |
| "logps/chosen": -114.5, |
| "logps/rejected": -138.0, |
| "loss": 0.5153, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.6171875, |
| "rewards/margins": 0.8671875, |
| "rewards/rejected": -1.484375, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.9487179487179487, |
| "grad_norm": 2.8286232948303223, |
| "learning_rate": 1.4349762679944895e-06, |
| "logits/chosen": 0.15234375, |
| "logits/rejected": 0.154296875, |
| "logps/chosen": -113.5, |
| "logps/rejected": -131.0, |
| "loss": 0.4437, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.74609375, |
| "rewards/margins": 0.78515625, |
| "rewards/rejected": -1.53125, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.9743589743589745, |
| "grad_norm": 2.7549867630004883, |
| "learning_rate": 1.419623358285314e-06, |
| "logits/chosen": 0.142578125, |
| "logits/rejected": 0.11669921875, |
| "logps/chosen": -106.0, |
| "logps/rejected": -130.0, |
| "loss": 0.4782, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.52734375, |
| "rewards/margins": 0.796875, |
| "rewards/rejected": -1.328125, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.993396520614624, |
| "learning_rate": 1.404149442965644e-06, |
| "logits/chosen": 0.234375, |
| "logits/rejected": 0.1884765625, |
| "logps/chosen": -107.0, |
| "logps/rejected": -129.0, |
| "loss": 0.4322, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.80078125, |
| "rewards/margins": 0.89453125, |
| "rewards/rejected": -1.6953125, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.0256410256410255, |
| "grad_norm": 2.6139297485351562, |
| "learning_rate": 1.3885589842050252e-06, |
| "logits/chosen": 0.314453125, |
| "logits/rejected": 0.267578125, |
| "logps/chosen": -94.5, |
| "logps/rejected": -105.0, |
| "loss": 0.479, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.7265625, |
| "rewards/margins": 0.71875, |
| "rewards/rejected": -1.4453125, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.051282051282051, |
| "grad_norm": 1.9830079078674316, |
| "learning_rate": 1.3728564777803086e-06, |
| "logits/chosen": 0.126953125, |
| "logits/rejected": 0.1376953125, |
| "logps/chosen": -111.5, |
| "logps/rejected": -123.5, |
| "loss": 0.4288, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.76953125, |
| "rewards/margins": 0.7265625, |
| "rewards/rejected": -1.5, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.076923076923077, |
| "grad_norm": 2.3474740982055664, |
| "learning_rate": 1.3570464517792152e-06, |
| "logits/chosen": 0.1708984375, |
| "logits/rejected": 0.1572265625, |
| "logps/chosen": -106.0, |
| "logps/rejected": -145.0, |
| "loss": 0.4326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.73046875, |
| "rewards/margins": 1.4921875, |
| "rewards/rejected": -2.21875, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.1025641025641026, |
| "grad_norm": 2.3264522552490234, |
| "learning_rate": 1.3411334652945848e-06, |
| "logits/chosen": 0.236328125, |
| "logits/rejected": 0.265625, |
| "logps/chosen": -115.0, |
| "logps/rejected": -131.0, |
| "loss": 0.4788, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.03125, |
| "rewards/margins": 0.5234375, |
| "rewards/rejected": -1.546875, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.128205128205128, |
| "grad_norm": 2.1243481636047363, |
| "learning_rate": 1.3251221071096834e-06, |
| "logits/chosen": 0.2255859375, |
| "logits/rejected": 0.1943359375, |
| "logps/chosen": -117.0, |
| "logps/rejected": -120.0, |
| "loss": 0.4144, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.83984375, |
| "rewards/margins": 0.65234375, |
| "rewards/rejected": -1.4921875, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 2.6467950344085693, |
| "learning_rate": 1.3090169943749473e-06, |
| "logits/chosen": 0.22265625, |
| "logits/rejected": 0.25390625, |
| "logps/chosen": -117.5, |
| "logps/rejected": -138.0, |
| "loss": 0.4197, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.81640625, |
| "rewards/margins": 0.9765625, |
| "rewards/rejected": -1.7890625, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.1794871794871793, |
| "grad_norm": 2.3144333362579346, |
| "learning_rate": 1.2928227712765502e-06, |
| "logits/chosen": 0.1650390625, |
| "logits/rejected": 0.1767578125, |
| "logps/chosen": -120.5, |
| "logps/rejected": -131.0, |
| "loss": 0.4023, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.96484375, |
| "rewards/margins": 0.93359375, |
| "rewards/rejected": -1.8984375, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.2051282051282053, |
| "grad_norm": 2.361914873123169, |
| "learning_rate": 1.276544107697171e-06, |
| "logits/chosen": 0.236328125, |
| "logits/rejected": 0.197265625, |
| "logps/chosen": -138.0, |
| "logps/rejected": -149.0, |
| "loss": 0.4806, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -1.1171875, |
| "rewards/margins": 0.92578125, |
| "rewards/rejected": -2.046875, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 2.1916277408599854, |
| "learning_rate": 1.260185697869353e-06, |
| "logits/chosen": 0.173828125, |
| "logits/rejected": 0.2060546875, |
| "logps/chosen": -125.0, |
| "logps/rejected": -133.0, |
| "loss": 0.4517, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.84375, |
| "rewards/margins": 1.0859375, |
| "rewards/rejected": -1.9296875, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.2564102564102564, |
| "grad_norm": 2.8083648681640625, |
| "learning_rate": 1.2437522590218416e-06, |
| "logits/chosen": 0.20703125, |
| "logits/rejected": 0.2021484375, |
| "logps/chosen": -109.0, |
| "logps/rejected": -131.0, |
| "loss": 0.4198, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.98046875, |
| "rewards/margins": 1.03125, |
| "rewards/rejected": -2.015625, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.282051282051282, |
| "grad_norm": 3.0645532608032227, |
| "learning_rate": 1.22724853001929e-06, |
| "logits/chosen": 0.1259765625, |
| "logits/rejected": 0.1630859375, |
| "logps/chosen": -113.0, |
| "logps/rejected": -129.0, |
| "loss": 0.4473, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.84765625, |
| "rewards/margins": 1.0703125, |
| "rewards/rejected": -1.921875, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 2.734609842300415, |
| "learning_rate": 1.2106792699957262e-06, |
| "logits/chosen": 0.1328125, |
| "logits/rejected": 0.1103515625, |
| "logps/chosen": -137.0, |
| "logps/rejected": -157.0, |
| "loss": 0.4295, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.25, |
| "rewards/margins": 1.1171875, |
| "rewards/rejected": -2.359375, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 3.1695902347564697, |
| "learning_rate": 1.1940492569821752e-06, |
| "logits/chosen": 0.171875, |
| "logits/rejected": 0.2431640625, |
| "logps/chosen": -129.0, |
| "logps/rejected": -162.0, |
| "loss": 0.4173, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.15625, |
| "rewards/margins": 1.453125, |
| "rewards/rejected": -2.625, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.358974358974359, |
| "grad_norm": 2.7723398208618164, |
| "learning_rate": 1.1773632865288308e-06, |
| "logits/chosen": 0.1962890625, |
| "logits/rejected": 0.130859375, |
| "logps/chosen": -126.0, |
| "logps/rejected": -148.0, |
| "loss": 0.4664, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.09375, |
| "rewards/margins": 1.296875, |
| "rewards/rejected": -2.375, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.3846153846153846, |
| "grad_norm": 2.630082607269287, |
| "learning_rate": 1.160626170322177e-06, |
| "logits/chosen": 0.09912109375, |
| "logits/rejected": 0.134765625, |
| "logps/chosen": -119.0, |
| "logps/rejected": -147.0, |
| "loss": 0.4221, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.91015625, |
| "rewards/margins": 1.1875, |
| "rewards/rejected": -2.09375, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.41025641025641, |
| "grad_norm": 2.4502968788146973, |
| "learning_rate": 1.1438427347974552e-06, |
| "logits/chosen": 0.17578125, |
| "logits/rejected": 0.1787109375, |
| "logps/chosen": -130.0, |
| "logps/rejected": -167.0, |
| "loss": 0.3601, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.03125, |
| "rewards/margins": 1.40625, |
| "rewards/rejected": -2.4375, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.435897435897436, |
| "grad_norm": 2.8177709579467773, |
| "learning_rate": 1.1270178197468786e-06, |
| "logits/chosen": 0.1044921875, |
| "logits/rejected": 0.0966796875, |
| "logps/chosen": -122.5, |
| "logps/rejected": -147.0, |
| "loss": 0.4305, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9765625, |
| "rewards/margins": 1.265625, |
| "rewards/rejected": -2.25, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 1.9717708826065063, |
| "learning_rate": 1.1101562769239945e-06, |
| "logits/chosen": 0.20703125, |
| "logits/rejected": 0.2021484375, |
| "logps/chosen": -115.5, |
| "logps/rejected": -141.0, |
| "loss": 0.3989, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -1.0234375, |
| "rewards/margins": 1.375, |
| "rewards/rejected": -2.390625, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.4871794871794872, |
| "grad_norm": 2.79207181930542, |
| "learning_rate": 1.0932629686445985e-06, |
| "logits/chosen": 0.08056640625, |
| "logits/rejected": 0.07568359375, |
| "logps/chosen": -99.0, |
| "logps/rejected": -112.5, |
| "loss": 0.4288, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.796875, |
| "rewards/margins": 1.0234375, |
| "rewards/rejected": -1.828125, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.5128205128205128, |
| "grad_norm": 2.2847957611083984, |
| "learning_rate": 1.0763427663846013e-06, |
| "logits/chosen": 0.07373046875, |
| "logits/rejected": 0.06494140625, |
| "logps/chosen": -118.5, |
| "logps/rejected": -153.0, |
| "loss": 0.3853, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.796875, |
| "rewards/margins": 1.484375, |
| "rewards/rejected": -2.28125, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.5384615384615383, |
| "grad_norm": 2.584763288497925, |
| "learning_rate": 1.0594005493752567e-06, |
| "logits/chosen": 0.1416015625, |
| "logits/rejected": 0.099609375, |
| "logps/chosen": -103.0, |
| "logps/rejected": -112.0, |
| "loss": 0.4753, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.84375, |
| "rewards/margins": 0.8359375, |
| "rewards/rejected": -1.6796875, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.564102564102564, |
| "grad_norm": 2.5454132556915283, |
| "learning_rate": 1.0424412031961483e-06, |
| "logits/chosen": 0.091796875, |
| "logits/rejected": 0.140625, |
| "logps/chosen": -133.0, |
| "logps/rejected": -159.0, |
| "loss": 0.4611, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.1640625, |
| "rewards/margins": 1.2265625, |
| "rewards/rejected": -2.390625, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.58974358974359, |
| "grad_norm": 2.596614360809326, |
| "learning_rate": 1.025469618366351e-06, |
| "logits/chosen": 0.10693359375, |
| "logits/rejected": 0.12255859375, |
| "logps/chosen": -127.0, |
| "logps/rejected": -139.0, |
| "loss": 0.3776, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.046875, |
| "rewards/margins": 1.3125, |
| "rewards/rejected": -2.359375, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 2.499739170074463, |
| "learning_rate": 1.0084906889341656e-06, |
| "logits/chosen": 0.1259765625, |
| "logits/rejected": 0.126953125, |
| "logps/chosen": -119.5, |
| "logps/rejected": -153.0, |
| "loss": 0.4089, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.1171875, |
| "rewards/margins": 1.15625, |
| "rewards/rejected": -2.28125, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.641025641025641, |
| "grad_norm": 2.699960231781006, |
| "learning_rate": 9.915093110658345e-07, |
| "logits/chosen": 0.0986328125, |
| "logits/rejected": 0.08349609375, |
| "logps/chosen": -130.0, |
| "logps/rejected": -157.0, |
| "loss": 0.3958, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.4140625, |
| "rewards/margins": 1.1484375, |
| "rewards/rejected": -2.5625, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 2.4888288974761963, |
| "learning_rate": 9.745303816336488e-07, |
| "logits/chosen": 0.08447265625, |
| "logits/rejected": 0.1953125, |
| "logps/chosen": -120.5, |
| "logps/rejected": -146.0, |
| "loss": 0.4124, |
| "rewards/accuracies": 0.9583333134651184, |
| "rewards/chosen": -0.875, |
| "rewards/margins": 1.6328125, |
| "rewards/rejected": -2.5, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 1.9904980659484863, |
| "learning_rate": 9.575587968038518e-07, |
| "logits/chosen": 0.1083984375, |
| "logits/rejected": 0.21484375, |
| "logps/chosen": -125.5, |
| "logps/rejected": -149.0, |
| "loss": 0.3721, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.9453125, |
| "rewards/margins": 1.71875, |
| "rewards/rejected": -2.671875, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.717948717948718, |
| "grad_norm": 2.642254590988159, |
| "learning_rate": 9.405994506247431e-07, |
| "logits/chosen": 0.1083984375, |
| "logits/rejected": 0.11376953125, |
| "logps/chosen": -138.0, |
| "logps/rejected": -154.0, |
| "loss": 0.3767, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.1171875, |
| "rewards/margins": 1.21875, |
| "rewards/rejected": -2.328125, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.7435897435897436, |
| "grad_norm": 2.751514434814453, |
| "learning_rate": 9.236572336153985e-07, |
| "logits/chosen": 0.236328125, |
| "logits/rejected": 0.1708984375, |
| "logps/chosen": -108.0, |
| "logps/rejected": -150.0, |
| "loss": 0.3958, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.96484375, |
| "rewards/margins": 1.5, |
| "rewards/rejected": -2.46875, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 2.73346209526062, |
| "learning_rate": 9.067370313554015e-07, |
| "logits/chosen": 0.150390625, |
| "logits/rejected": 0.130859375, |
| "logps/chosen": -107.0, |
| "logps/rejected": -141.0, |
| "loss": 0.3487, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.83203125, |
| "rewards/margins": 1.7109375, |
| "rewards/rejected": -2.546875, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.7948717948717947, |
| "grad_norm": 2.5458078384399414, |
| "learning_rate": 8.898437230760057e-07, |
| "logits/chosen": 0.208984375, |
| "logits/rejected": 0.13671875, |
| "logps/chosen": -110.5, |
| "logps/rejected": -152.0, |
| "loss": 0.3914, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.89453125, |
| "rewards/margins": 1.40625, |
| "rewards/rejected": -2.296875, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.8205128205128203, |
| "grad_norm": 2.728673219680786, |
| "learning_rate": 8.729821802531212e-07, |
| "logits/chosen": 0.146484375, |
| "logits/rejected": 0.1123046875, |
| "logps/chosen": -136.0, |
| "logps/rejected": -158.0, |
| "loss": 0.419, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.15625, |
| "rewards/margins": 1.4453125, |
| "rewards/rejected": -2.59375, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.8461538461538463, |
| "grad_norm": 2.601808786392212, |
| "learning_rate": 8.561572652025446e-07, |
| "logits/chosen": 0.130859375, |
| "logits/rejected": 0.103515625, |
| "logps/chosen": -117.5, |
| "logps/rejected": -150.0, |
| "loss": 0.372, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.109375, |
| "rewards/margins": 1.3046875, |
| "rewards/rejected": -2.40625, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.871794871794872, |
| "grad_norm": 2.5382025241851807, |
| "learning_rate": 8.393738296778227e-07, |
| "logits/chosen": 0.04931640625, |
| "logits/rejected": 0.0257568359375, |
| "logps/chosen": -136.0, |
| "logps/rejected": -136.0, |
| "loss": 0.4305, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -1.265625, |
| "rewards/margins": 0.7265625, |
| "rewards/rejected": -1.9921875, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.8974358974358974, |
| "grad_norm": 2.9267117977142334, |
| "learning_rate": 8.226367134711691e-07, |
| "logits/chosen": 0.046875, |
| "logits/rejected": 0.12890625, |
| "logps/chosen": -129.0, |
| "logps/rejected": -160.0, |
| "loss": 0.4168, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -1.3046875, |
| "rewards/margins": 1.46875, |
| "rewards/rejected": -2.765625, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 2.3900935649871826, |
| "learning_rate": 8.059507430178246e-07, |
| "logits/chosen": 0.080078125, |
| "logits/rejected": 0.1220703125, |
| "logps/chosen": -146.0, |
| "logps/rejected": -175.0, |
| "loss": 0.4027, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -1.390625, |
| "rewards/margins": 1.609375, |
| "rewards/rejected": -3.0, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.948717948717949, |
| "grad_norm": 2.6986002922058105, |
| "learning_rate": 7.89320730004274e-07, |
| "logits/chosen": 0.1103515625, |
| "logits/rejected": 0.08056640625, |
| "logps/chosen": -105.0, |
| "logps/rejected": -145.0, |
| "loss": 0.4405, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2265625, |
| "rewards/margins": 1.2109375, |
| "rewards/rejected": -2.4375, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.9743589743589745, |
| "grad_norm": 2.854278564453125, |
| "learning_rate": 7.727514699807101e-07, |
| "logits/chosen": 0.1748046875, |
| "logits/rejected": 0.1796875, |
| "logps/chosen": -116.0, |
| "logps/rejected": -127.0, |
| "loss": 0.394, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.93359375, |
| "rewards/margins": 1.09375, |
| "rewards/rejected": -2.015625, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 2.017193078994751, |
| "learning_rate": 7.562477409781584e-07, |
| "logits/chosen": 0.035888671875, |
| "logits/rejected": 0.08935546875, |
| "logps/chosen": -107.0, |
| "logps/rejected": -125.0, |
| "loss": 0.3628, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.92578125, |
| "rewards/margins": 1.109375, |
| "rewards/rejected": -2.03125, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.0256410256410255, |
| "grad_norm": 2.873739242553711, |
| "learning_rate": 7.39814302130647e-07, |
| "logits/chosen": 0.1572265625, |
| "logits/rejected": 0.1005859375, |
| "logps/chosen": -117.5, |
| "logps/rejected": -140.0, |
| "loss": 0.399, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.890625, |
| "rewards/margins": 1.546875, |
| "rewards/rejected": -2.4375, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.051282051282051, |
| "grad_norm": 2.0300047397613525, |
| "learning_rate": 7.234558923028289e-07, |
| "logits/chosen": 0.0693359375, |
| "logits/rejected": 0.09228515625, |
| "logps/chosen": -116.0, |
| "logps/rejected": -146.0, |
| "loss": 0.3313, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.046875, |
| "rewards/margins": 1.5078125, |
| "rewards/rejected": -2.5625, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 3.1289193630218506, |
| "learning_rate": 7.071772287234496e-07, |
| "logits/chosen": 0.001678466796875, |
| "logits/rejected": 0.0296630859375, |
| "logps/chosen": -138.0, |
| "logps/rejected": -174.0, |
| "loss": 0.3334, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.1953125, |
| "rewards/margins": 1.640625, |
| "rewards/rejected": -2.84375, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.1025641025641026, |
| "grad_norm": 2.7747201919555664, |
| "learning_rate": 6.909830056250526e-07, |
| "logits/chosen": 0.0093994140625, |
| "logits/rejected": 0.05322265625, |
| "logps/chosen": -139.0, |
| "logps/rejected": -159.0, |
| "loss": 0.3814, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.203125, |
| "rewards/margins": 1.46875, |
| "rewards/rejected": -2.6875, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.128205128205128, |
| "grad_norm": 2.415590286254883, |
| "learning_rate": 6.748778928903165e-07, |
| "logits/chosen": 0.056396484375, |
| "logits/rejected": 0.11181640625, |
| "logps/chosen": -123.0, |
| "logps/rejected": -163.0, |
| "loss": 0.3505, |
| "rewards/accuracies": 0.9583333134651184, |
| "rewards/chosen": -1.2265625, |
| "rewards/margins": 1.8515625, |
| "rewards/rejected": -3.078125, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.1538461538461537, |
| "grad_norm": 2.7427213191986084, |
| "learning_rate": 6.588665347054152e-07, |
| "logits/chosen": -0.031494140625, |
| "logits/rejected": 0.06494140625, |
| "logps/chosen": -135.0, |
| "logps/rejected": -162.0, |
| "loss": 0.3808, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.21875, |
| "rewards/margins": 1.578125, |
| "rewards/rejected": -2.796875, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.1794871794871793, |
| "grad_norm": 2.864881753921509, |
| "learning_rate": 6.429535482207846e-07, |
| "logits/chosen": 0.09716796875, |
| "logits/rejected": 0.07958984375, |
| "logps/chosen": -112.0, |
| "logps/rejected": -140.0, |
| "loss": 0.354, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.96875, |
| "rewards/margins": 1.453125, |
| "rewards/rejected": -2.421875, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.2051282051282053, |
| "grad_norm": 2.073596954345703, |
| "learning_rate": 6.271435222196914e-07, |
| "logits/chosen": 0.0291748046875, |
| "logits/rejected": 0.009033203125, |
| "logps/chosen": -139.0, |
| "logps/rejected": -149.0, |
| "loss": 0.315, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2578125, |
| "rewards/margins": 1.6328125, |
| "rewards/rejected": -2.890625, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.230769230769231, |
| "grad_norm": 2.8894355297088623, |
| "learning_rate": 6.114410157949745e-07, |
| "logits/chosen": 0.018798828125, |
| "logits/rejected": 0.0986328125, |
| "logps/chosen": -123.0, |
| "logps/rejected": -159.0, |
| "loss": 0.4066, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -1.3125, |
| "rewards/margins": 1.3359375, |
| "rewards/rejected": -2.640625, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.2564102564102564, |
| "grad_norm": 3.8194832801818848, |
| "learning_rate": 5.958505570343561e-07, |
| "logits/chosen": 0.1689453125, |
| "logits/rejected": 0.1611328125, |
| "logps/chosen": -110.5, |
| "logps/rejected": -136.0, |
| "loss": 0.4066, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.34375, |
| "rewards/margins": 0.91015625, |
| "rewards/rejected": -2.25, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.282051282051282, |
| "grad_norm": 2.2550456523895264, |
| "learning_rate": 5.803766417146859e-07, |
| "logits/chosen": 0.1865234375, |
| "logits/rejected": 0.1357421875, |
| "logps/chosen": -114.0, |
| "logps/rejected": -158.0, |
| "loss": 0.3373, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.3046875, |
| "rewards/margins": 1.75, |
| "rewards/rejected": -3.0625, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 3.2783937454223633, |
| "learning_rate": 5.650237320055106e-07, |
| "logits/chosen": -0.030029296875, |
| "logits/rejected": 0.04052734375, |
| "logps/chosen": -141.0, |
| "logps/rejected": -160.0, |
| "loss": 0.3154, |
| "rewards/accuracies": 0.9583333134651184, |
| "rewards/chosen": -1.3984375, |
| "rewards/margins": 1.796875, |
| "rewards/rejected": -3.203125, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 3.349325180053711, |
| "learning_rate": 5.497962551823266e-07, |
| "logits/chosen": -0.0537109375, |
| "logits/rejected": 0.0213623046875, |
| "logps/chosen": -140.0, |
| "logps/rejected": -175.0, |
| "loss": 0.3752, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.359375, |
| "rewards/margins": 1.625, |
| "rewards/rejected": -2.984375, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.358974358974359, |
| "grad_norm": 2.396205186843872, |
| "learning_rate": 5.346986023499026e-07, |
| "logits/chosen": 0.1240234375, |
| "logits/rejected": 0.126953125, |
| "logps/chosen": -135.0, |
| "logps/rejected": -171.0, |
| "loss": 0.3165, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.328125, |
| "rewards/margins": 1.8984375, |
| "rewards/rejected": -3.21875, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.3846153846153846, |
| "grad_norm": 2.4508631229400635, |
| "learning_rate": 5.197351271760258e-07, |
| "logits/chosen": -0.0263671875, |
| "logits/rejected": 0.0966796875, |
| "logps/chosen": -116.0, |
| "logps/rejected": -130.0, |
| "loss": 0.3593, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9609375, |
| "rewards/margins": 1.7578125, |
| "rewards/rejected": -2.71875, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.41025641025641, |
| "grad_norm": 3.2673799991607666, |
| "learning_rate": 5.049101446360497e-07, |
| "logits/chosen": 0.134765625, |
| "logits/rejected": 0.2021484375, |
| "logps/chosen": -110.5, |
| "logps/rejected": -133.0, |
| "loss": 0.3686, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.98828125, |
| "rewards/margins": 1.65625, |
| "rewards/rejected": -2.640625, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.435897435897436, |
| "grad_norm": 2.6491222381591797, |
| "learning_rate": 4.902279297685945e-07, |
| "logits/chosen": 0.0693359375, |
| "logits/rejected": 0.1455078125, |
| "logps/chosen": -124.0, |
| "logps/rejected": -137.0, |
| "loss": 0.3663, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.0546875, |
| "rewards/margins": 1.359375, |
| "rewards/rejected": -2.40625, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 3.2985990047454834, |
| "learning_rate": 4.756927164427684e-07, |
| "logits/chosen": 0.12451171875, |
| "logits/rejected": 0.1474609375, |
| "logps/chosen": -122.0, |
| "logps/rejected": -163.0, |
| "loss": 0.3685, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.3515625, |
| "rewards/margins": 1.4609375, |
| "rewards/rejected": -2.8125, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.4871794871794872, |
| "grad_norm": 1.697457194328308, |
| "learning_rate": 4.613086961372561e-07, |
| "logits/chosen": 0.181640625, |
| "logits/rejected": 0.23046875, |
| "logps/chosen": -113.0, |
| "logps/rejected": -124.0, |
| "loss": 0.333, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.3828125, |
| "rewards/margins": 1.0703125, |
| "rewards/rejected": -2.453125, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.5128205128205128, |
| "grad_norm": 3.6975581645965576, |
| "learning_rate": 4.4708001673163663e-07, |
| "logits/chosen": 0.0174560546875, |
| "logits/rejected": 0.04052734375, |
| "logps/chosen": -120.0, |
| "logps/rejected": -150.0, |
| "loss": 0.3828, |
| "rewards/accuracies": 0.9583333134651184, |
| "rewards/chosen": -1.125, |
| "rewards/margins": 1.7890625, |
| "rewards/rejected": -2.921875, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.5384615384615383, |
| "grad_norm": 2.6693131923675537, |
| "learning_rate": 4.3301078131026823e-07, |
| "logits/chosen": 0.072265625, |
| "logits/rejected": 0.08740234375, |
| "logps/chosen": -140.0, |
| "logps/rejected": -170.0, |
| "loss": 0.3223, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.4765625, |
| "rewards/margins": 1.9140625, |
| "rewards/rejected": -3.390625, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.564102564102564, |
| "grad_norm": 3.120739221572876, |
| "learning_rate": 4.191050469790961e-07, |
| "logits/chosen": 0.09033203125, |
| "logits/rejected": 0.12353515625, |
| "logps/chosen": -123.0, |
| "logps/rejected": -155.0, |
| "loss": 0.3776, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -1.5, |
| "rewards/margins": 1.3984375, |
| "rewards/rejected": -2.890625, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.58974358974359, |
| "grad_norm": 2.7248075008392334, |
| "learning_rate": 4.053668236957134e-07, |
| "logits/chosen": 0.072265625, |
| "logits/rejected": 0.09765625, |
| "logps/chosen": -134.0, |
| "logps/rejected": -165.0, |
| "loss": 0.2954, |
| "rewards/accuracies": 0.9583333134651184, |
| "rewards/chosen": -1.265625, |
| "rewards/margins": 2.1875, |
| "rewards/rejected": -3.453125, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 2.782973527908325, |
| "learning_rate": 3.918000731130238e-07, |
| "logits/chosen": 0.1015625, |
| "logits/rejected": 0.123046875, |
| "logps/chosen": -115.0, |
| "logps/rejected": -152.0, |
| "loss": 0.336, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.203125, |
| "rewards/margins": 1.828125, |
| "rewards/rejected": -3.03125, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.641025641025641, |
| "grad_norm": 2.623746395111084, |
| "learning_rate": 3.7840870743682784e-07, |
| "logits/chosen": 0.0361328125, |
| "logits/rejected": -0.0135498046875, |
| "logps/chosen": -114.5, |
| "logps/rejected": -144.0, |
| "loss": 0.332, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.1796875, |
| "rewards/margins": 1.4140625, |
| "rewards/rejected": -2.59375, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 3.1472394466400146, |
| "learning_rate": 3.651965882976745e-07, |
| "logits/chosen": 0.076171875, |
| "logits/rejected": 0.061279296875, |
| "logps/chosen": -121.0, |
| "logps/rejected": -138.0, |
| "loss": 0.4179, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -1.5546875, |
| "rewards/margins": 1.1015625, |
| "rewards/rejected": -2.65625, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.6923076923076925, |
| "grad_norm": 3.3787548542022705, |
| "learning_rate": 3.5216752563729004e-07, |
| "logits/chosen": 0.05126953125, |
| "logits/rejected": 0.09716796875, |
| "logps/chosen": -125.0, |
| "logps/rejected": -144.0, |
| "loss": 0.3857, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.5, |
| "rewards/margins": 1.546875, |
| "rewards/rejected": -3.046875, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.717948717948718, |
| "grad_norm": 2.2831146717071533, |
| "learning_rate": 3.393252766099187e-07, |
| "logits/chosen": -0.06103515625, |
| "logits/rejected": -0.0380859375, |
| "logps/chosen": -129.0, |
| "logps/rejected": -160.0, |
| "loss": 0.2803, |
| "rewards/accuracies": 0.9583333134651184, |
| "rewards/chosen": -1.2890625, |
| "rewards/margins": 1.984375, |
| "rewards/rejected": -3.265625, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.7435897435897436, |
| "grad_norm": 3.6543710231781006, |
| "learning_rate": 3.2667354449888074e-07, |
| "logits/chosen": 0.00030517578125, |
| "logits/rejected": 0.039794921875, |
| "logps/chosen": -146.0, |
| "logps/rejected": -165.0, |
| "loss": 0.4561, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.8984375, |
| "rewards/margins": 1.2109375, |
| "rewards/rejected": -3.109375, |
| "step": 438 |
| }, |
| { |
| "epoch": 3.769230769230769, |
| "grad_norm": 2.645735025405884, |
| "learning_rate": 3.1421597764866856e-07, |
| "logits/chosen": 0.04248046875, |
| "logits/rejected": 0.0380859375, |
| "logps/chosen": -105.5, |
| "logps/rejected": -143.0, |
| "loss": 0.3253, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.046875, |
| "rewards/margins": 1.609375, |
| "rewards/rejected": -2.65625, |
| "step": 441 |
| }, |
| { |
| "epoch": 3.7948717948717947, |
| "grad_norm": 3.269747018814087, |
| "learning_rate": 3.019561684128823e-07, |
| "logits/chosen": 0.040771484375, |
| "logits/rejected": 0.005584716796875, |
| "logps/chosen": -123.5, |
| "logps/rejected": -143.0, |
| "loss": 0.4199, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.25, |
| "rewards/margins": 1.2890625, |
| "rewards/rejected": -2.546875, |
| "step": 444 |
| }, |
| { |
| "epoch": 3.8205128205128203, |
| "grad_norm": 3.2382609844207764, |
| "learning_rate": 2.898976521183143e-07, |
| "logits/chosen": 0.1064453125, |
| "logits/rejected": 0.0830078125, |
| "logps/chosen": -120.5, |
| "logps/rejected": -149.0, |
| "loss": 0.3449, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.1328125, |
| "rewards/margins": 1.8125, |
| "rewards/rejected": -2.9375, |
| "step": 447 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 3.1709492206573486, |
| "learning_rate": 2.7804390604547556e-07, |
| "logits/chosen": -0.0198974609375, |
| "logits/rejected": -0.01123046875, |
| "logps/chosen": -135.0, |
| "logps/rejected": -170.0, |
| "loss": 0.3787, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.65625, |
| "rewards/margins": 2.0, |
| "rewards/rejected": -3.65625, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.871794871794872, |
| "grad_norm": 2.88985013961792, |
| "learning_rate": 2.663983484258636e-07, |
| "logits/chosen": 0.031494140625, |
| "logits/rejected": 0.048583984375, |
| "logps/chosen": -137.0, |
| "logps/rejected": -157.0, |
| "loss": 0.3285, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -1.6484375, |
| "rewards/margins": 1.5859375, |
| "rewards/rejected": -3.234375, |
| "step": 453 |
| }, |
| { |
| "epoch": 3.8974358974358974, |
| "grad_norm": 3.0768532752990723, |
| "learning_rate": 2.549643374562549e-07, |
| "logits/chosen": 0.1064453125, |
| "logits/rejected": 0.1376953125, |
| "logps/chosen": -90.5, |
| "logps/rejected": -125.0, |
| "loss": 0.3256, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.140625, |
| "rewards/margins": 1.359375, |
| "rewards/rejected": -2.5, |
| "step": 456 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 2.6071791648864746, |
| "learning_rate": 2.4374517033031283e-07, |
| "logits/chosen": 0.1171875, |
| "logits/rejected": 0.14453125, |
| "logps/chosen": -121.0, |
| "logps/rejected": -146.0, |
| "loss": 0.3638, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -1.3359375, |
| "rewards/margins": 1.5546875, |
| "rewards/rejected": -2.890625, |
| "step": 459 |
| }, |
| { |
| "epoch": 3.948717948717949, |
| "grad_norm": 2.748244285583496, |
| "learning_rate": 2.3274408228778354e-07, |
| "logits/chosen": 0.015625, |
| "logits/rejected": 0.042236328125, |
| "logps/chosen": -112.0, |
| "logps/rejected": -135.0, |
| "loss": 0.3545, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.9375, |
| "rewards/margins": 1.390625, |
| "rewards/rejected": -2.328125, |
| "step": 462 |
| }, |
| { |
| "epoch": 3.9743589743589745, |
| "grad_norm": 2.53035306930542, |
| "learning_rate": 2.219642456815607e-07, |
| "logits/chosen": 0.1640625, |
| "logits/rejected": 0.0693359375, |
| "logps/chosen": -110.0, |
| "logps/rejected": -133.0, |
| "loss": 0.3363, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.95703125, |
| "rewards/margins": 1.6328125, |
| "rewards/rejected": -2.59375, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 2.739515542984009, |
| "learning_rate": 2.1140876906288086e-07, |
| "logits/chosen": 0.005340576171875, |
| "logits/rejected": 0.028076171875, |
| "logps/chosen": -119.0, |
| "logps/rejected": -154.0, |
| "loss": 0.285, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.3203125, |
| "rewards/margins": 1.6484375, |
| "rewards/rejected": -2.96875, |
| "step": 468 |
| } |
| ], |
| "logging_steps": 3, |
| "max_steps": 585, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|