Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": 0.6631070971488953, | |
| "best_model_checkpoint": "./output/checkpoints/2024-05-27_09-03-33/checkpoint-1100", | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 1271, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003933910306845004, | |
| "grad_norm": 27.324785232543945, | |
| "learning_rate": 6.25e-07, | |
| "logits/chosen": -0.23312029242515564, | |
| "logits/rejected": -0.7136957049369812, | |
| "logps/chosen": -206.98876953125, | |
| "logps/rejected": -177.72207641601562, | |
| "loss": 0.6946, | |
| "rewards/accuracies": 0.22499999403953552, | |
| "rewards/chosen": -0.0011991311330348253, | |
| "rewards/margins": -0.0031457520090043545, | |
| "rewards/rejected": 0.001946620992384851, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007867820613690008, | |
| "grad_norm": 26.920639038085938, | |
| "learning_rate": 1.40625e-06, | |
| "logits/chosen": -0.3985660672187805, | |
| "logits/rejected": -0.7379584908485413, | |
| "logps/chosen": -201.005859375, | |
| "logps/rejected": -177.08181762695312, | |
| "loss": 0.688, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.010929527692496777, | |
| "rewards/margins": 0.013672275468707085, | |
| "rewards/rejected": -0.002742747776210308, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011801730920535013, | |
| "grad_norm": 34.40425109863281, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "logits/chosen": -0.35717901587486267, | |
| "logits/rejected": -0.660548746585846, | |
| "logps/chosen": -217.42825317382812, | |
| "logps/rejected": -194.10195922851562, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.034006841480731964, | |
| "rewards/margins": 0.0028066448867321014, | |
| "rewards/rejected": 0.03120020031929016, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.015735641227380016, | |
| "grad_norm": 27.097261428833008, | |
| "learning_rate": 2.96875e-06, | |
| "logits/chosen": -0.3896491825580597, | |
| "logits/rejected": -0.7307055592536926, | |
| "logps/chosen": -209.29373168945312, | |
| "logps/rejected": -179.78488159179688, | |
| "loss": 0.6839, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.08366340398788452, | |
| "rewards/margins": 0.025963936001062393, | |
| "rewards/rejected": 0.05769947171211243, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01966955153422502, | |
| "grad_norm": 29.19064712524414, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "logits/chosen": -0.24666282534599304, | |
| "logits/rejected": -0.7009283900260925, | |
| "logps/chosen": -196.3118438720703, | |
| "logps/rejected": -178.7552032470703, | |
| "loss": 0.683, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.13745614886283875, | |
| "rewards/margins": 0.03245489299297333, | |
| "rewards/rejected": 0.1050012856721878, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.023603461841070025, | |
| "grad_norm": 31.083709716796875, | |
| "learning_rate": 4.53125e-06, | |
| "logits/chosen": -0.3193593919277191, | |
| "logits/rejected": -0.6126649379730225, | |
| "logps/chosen": -208.44863891601562, | |
| "logps/rejected": -184.2353057861328, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.22270426154136658, | |
| "rewards/margins": 0.03411892056465149, | |
| "rewards/rejected": 0.18858537077903748, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02753737214791503, | |
| "grad_norm": 25.83799171447754, | |
| "learning_rate": 5.3125e-06, | |
| "logits/chosen": -0.46783486008644104, | |
| "logits/rejected": -0.7504000067710876, | |
| "logps/chosen": -221.98843383789062, | |
| "logps/rejected": -199.54000854492188, | |
| "loss": 0.6705, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.3932109773159027, | |
| "rewards/margins": 0.08941729366779327, | |
| "rewards/rejected": 0.30379369854927063, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03147128245476003, | |
| "grad_norm": 24.734338760375977, | |
| "learning_rate": 6.093750000000001e-06, | |
| "logits/chosen": -0.3396364748477936, | |
| "logits/rejected": -0.7113901376724243, | |
| "logps/chosen": -196.3134765625, | |
| "logps/rejected": -179.5933380126953, | |
| "loss": 0.6879, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.4551575779914856, | |
| "rewards/margins": 0.05487058684229851, | |
| "rewards/rejected": 0.4002869725227356, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03540519276160504, | |
| "grad_norm": 28.34064292907715, | |
| "learning_rate": 6.718750000000001e-06, | |
| "logits/chosen": -0.667598307132721, | |
| "logits/rejected": -1.014026403427124, | |
| "logps/chosen": -196.5115966796875, | |
| "logps/rejected": -165.67092895507812, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.5074445605278015, | |
| "rewards/margins": 0.09343204647302628, | |
| "rewards/rejected": 0.41401252150535583, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03933910306845004, | |
| "grad_norm": 30.12347984313965, | |
| "learning_rate": 7.500000000000001e-06, | |
| "logits/chosen": -0.2210284173488617, | |
| "logits/rejected": -0.32401731610298157, | |
| "logps/chosen": -210.63818359375, | |
| "logps/rejected": -205.76895141601562, | |
| "loss": 0.6641, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.5923845171928406, | |
| "rewards/margins": 0.12340062856674194, | |
| "rewards/rejected": 0.4689839482307434, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.043273013375295044, | |
| "grad_norm": 47.19338607788086, | |
| "learning_rate": 8.281250000000001e-06, | |
| "logits/chosen": -0.5629546642303467, | |
| "logits/rejected": -0.7718995213508606, | |
| "logps/chosen": -194.5259552001953, | |
| "logps/rejected": -179.5989532470703, | |
| "loss": 0.6902, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.6510985493659973, | |
| "rewards/margins": 0.10642552375793457, | |
| "rewards/rejected": 0.544672966003418, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.04720692368214005, | |
| "grad_norm": 23.202775955200195, | |
| "learning_rate": 9.0625e-06, | |
| "logits/chosen": -0.3029821217060089, | |
| "logits/rejected": -0.7788914442062378, | |
| "logps/chosen": -214.9969940185547, | |
| "logps/rejected": -167.64263916015625, | |
| "loss": 0.6472, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.6703575849533081, | |
| "rewards/margins": 0.22439488768577576, | |
| "rewards/rejected": 0.44596266746520996, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05114083398898505, | |
| "grad_norm": 35.26408386230469, | |
| "learning_rate": 9.84375e-06, | |
| "logits/chosen": -0.4141275882720947, | |
| "logits/rejected": -0.7083785533905029, | |
| "logps/chosen": -212.9031524658203, | |
| "logps/rejected": -198.8483428955078, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.7610660791397095, | |
| "rewards/margins": 0.2469903975725174, | |
| "rewards/rejected": 0.514075756072998, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.05507474429583006, | |
| "grad_norm": 19.10537338256836, | |
| "learning_rate": 1.0625e-05, | |
| "logits/chosen": -0.4033733308315277, | |
| "logits/rejected": -0.7651963829994202, | |
| "logps/chosen": -212.84487915039062, | |
| "logps/rejected": -174.28073120117188, | |
| "loss": 0.6534, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.7663796544075012, | |
| "rewards/margins": 0.24841317534446716, | |
| "rewards/rejected": 0.5179664492607117, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.059008654602675056, | |
| "grad_norm": 26.261890411376953, | |
| "learning_rate": 1.1406250000000001e-05, | |
| "logits/chosen": -0.10389180481433868, | |
| "logits/rejected": -0.5258628129959106, | |
| "logps/chosen": -206.84921264648438, | |
| "logps/rejected": -186.50869750976562, | |
| "loss": 0.6808, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.8156352043151855, | |
| "rewards/margins": 0.17864595353603363, | |
| "rewards/rejected": 0.6369892358779907, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06294256490952006, | |
| "grad_norm": 32.33486557006836, | |
| "learning_rate": 1.2187500000000001e-05, | |
| "logits/chosen": -0.22502727806568146, | |
| "logits/rejected": -0.49946776032447815, | |
| "logps/chosen": -209.71426391601562, | |
| "logps/rejected": -198.34292602539062, | |
| "loss": 0.6852, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.6370053291320801, | |
| "rewards/margins": 0.15727970004081726, | |
| "rewards/rejected": 0.4797256886959076, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06687647521636507, | |
| "grad_norm": 38.13333511352539, | |
| "learning_rate": 1.2968750000000002e-05, | |
| "logits/chosen": -0.25742509961128235, | |
| "logits/rejected": -0.7358572483062744, | |
| "logps/chosen": -206.3865966796875, | |
| "logps/rejected": -178.12637329101562, | |
| "loss": 0.6652, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.3785225450992584, | |
| "rewards/margins": 0.16723336279392242, | |
| "rewards/rejected": 0.2112891674041748, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.07081038552321008, | |
| "grad_norm": 23.647096633911133, | |
| "learning_rate": 1.375e-05, | |
| "logits/chosen": -0.3365253806114197, | |
| "logits/rejected": -0.5771717429161072, | |
| "logps/chosen": -208.416748046875, | |
| "logps/rejected": -184.40476989746094, | |
| "loss": 0.7024, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.2850777506828308, | |
| "rewards/margins": 0.14986075460910797, | |
| "rewards/rejected": 0.13521698117256165, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07474429583005507, | |
| "grad_norm": 22.20098114013672, | |
| "learning_rate": 1.453125e-05, | |
| "logits/chosen": -0.21254411339759827, | |
| "logits/rejected": -0.6303216218948364, | |
| "logps/chosen": -201.83139038085938, | |
| "logps/rejected": -183.7214813232422, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.3604539632797241, | |
| "rewards/margins": 0.1408630609512329, | |
| "rewards/rejected": 0.2195909023284912, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.07867820613690008, | |
| "grad_norm": 29.343482971191406, | |
| "learning_rate": 1.5312500000000003e-05, | |
| "logits/chosen": -0.41852107644081116, | |
| "logits/rejected": -0.7636915445327759, | |
| "logps/chosen": -208.08035278320312, | |
| "logps/rejected": -178.69972229003906, | |
| "loss": 0.6731, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.4267478585243225, | |
| "rewards/margins": 0.16477522253990173, | |
| "rewards/rejected": 0.2619726061820984, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07867820613690008, | |
| "eval_logits/chosen": 1.3246409893035889, | |
| "eval_logits/rejected": 1.0977884531021118, | |
| "eval_logps/chosen": -206.3737030029297, | |
| "eval_logps/rejected": -179.28366088867188, | |
| "eval_loss": 0.6665228009223938, | |
| "eval_rewards/accuracies": 0.635937511920929, | |
| "eval_rewards/chosen": 0.6386381387710571, | |
| "eval_rewards/margins": 0.19896559417247772, | |
| "eval_rewards/rejected": 0.4396725594997406, | |
| "eval_runtime": 307.3381, | |
| "eval_samples_per_second": 2.082, | |
| "eval_steps_per_second": 0.13, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08261211644374508, | |
| "grad_norm": 24.263774871826172, | |
| "learning_rate": 1.609375e-05, | |
| "logits/chosen": -0.16335585713386536, | |
| "logits/rejected": -0.4457281231880188, | |
| "logps/chosen": -201.37017822265625, | |
| "logps/rejected": -176.67379760742188, | |
| "loss": 0.6641, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.6822856068611145, | |
| "rewards/margins": 0.18253257870674133, | |
| "rewards/rejected": 0.4997529983520508, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08654602675059009, | |
| "grad_norm": 25.775903701782227, | |
| "learning_rate": 1.6875e-05, | |
| "logits/chosen": -0.436201810836792, | |
| "logits/rejected": -0.9347764849662781, | |
| "logps/chosen": -195.61062622070312, | |
| "logps/rejected": -169.15048217773438, | |
| "loss": 0.6596, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.7915258407592773, | |
| "rewards/margins": 0.2815794348716736, | |
| "rewards/rejected": 0.5099464654922485, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0904799370574351, | |
| "grad_norm": 30.208763122558594, | |
| "learning_rate": 1.7656250000000002e-05, | |
| "logits/chosen": -0.5659558176994324, | |
| "logits/rejected": -0.855063796043396, | |
| "logps/chosen": -198.71206665039062, | |
| "logps/rejected": -174.78524780273438, | |
| "loss": 0.7202, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.6501097679138184, | |
| "rewards/margins": 0.17246408760547638, | |
| "rewards/rejected": 0.4776456952095032, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.0944138473642801, | |
| "grad_norm": 23.550596237182617, | |
| "learning_rate": 1.84375e-05, | |
| "logits/chosen": -0.5133547186851501, | |
| "logits/rejected": -0.734718382358551, | |
| "logps/chosen": -193.6223602294922, | |
| "logps/rejected": -179.42771911621094, | |
| "loss": 0.7313, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.5589785575866699, | |
| "rewards/margins": 0.10251788794994354, | |
| "rewards/rejected": 0.4564606547355652, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0983477576711251, | |
| "grad_norm": 29.921533584594727, | |
| "learning_rate": 1.9062500000000003e-05, | |
| "logits/chosen": -0.3889247179031372, | |
| "logits/rejected": -0.6225888133049011, | |
| "logps/chosen": -187.0243377685547, | |
| "logps/rejected": -176.29808044433594, | |
| "loss": 0.6273, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.40915530920028687, | |
| "rewards/margins": 0.3502606451511383, | |
| "rewards/rejected": 0.058894671499729156, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1022816679779701, | |
| "grad_norm": 29.90145492553711, | |
| "learning_rate": 1.984375e-05, | |
| "logits/chosen": -0.34609144926071167, | |
| "logits/rejected": -0.7598401308059692, | |
| "logps/chosen": -201.13104248046875, | |
| "logps/rejected": -173.50753784179688, | |
| "loss": 0.6626, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.1300664246082306, | |
| "rewards/margins": 0.21786466240882874, | |
| "rewards/rejected": -0.08779821544885635, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10621557828481511, | |
| "grad_norm": 23.906503677368164, | |
| "learning_rate": 1.9999395643917957e-05, | |
| "logits/chosen": -0.41295546293258667, | |
| "logits/rejected": -0.8447906374931335, | |
| "logps/chosen": -201.5752716064453, | |
| "logps/rejected": -165.7244415283203, | |
| "loss": 0.6405, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.21347875893115997, | |
| "rewards/margins": 0.3085169196128845, | |
| "rewards/rejected": -0.09503819793462753, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.11014948859166011, | |
| "grad_norm": 25.38832664489746, | |
| "learning_rate": 1.999694057253083e-05, | |
| "logits/chosen": -0.2702675759792328, | |
| "logits/rejected": -0.6757915019989014, | |
| "logps/chosen": -198.8104705810547, | |
| "logps/rejected": -175.73355102539062, | |
| "loss": 0.6331, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.7145684361457825, | |
| "rewards/margins": 0.3979041576385498, | |
| "rewards/rejected": 0.31666427850723267, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11408339889850512, | |
| "grad_norm": 25.388601303100586, | |
| "learning_rate": 1.9992597476892096e-05, | |
| "logits/chosen": -0.20559760928153992, | |
| "logits/rejected": -0.6221147775650024, | |
| "logps/chosen": -203.33877563476562, | |
| "logps/rejected": -177.6593780517578, | |
| "loss": 0.6278, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.8970493078231812, | |
| "rewards/margins": 0.41804951429367065, | |
| "rewards/rejected": 0.4789998531341553, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.11801730920535011, | |
| "grad_norm": 26.157350540161133, | |
| "learning_rate": 1.9986367177239688e-05, | |
| "logits/chosen": -0.34933823347091675, | |
| "logits/rejected": -0.5474187135696411, | |
| "logps/chosen": -192.22409057617188, | |
| "logps/rejected": -179.11972045898438, | |
| "loss": 0.7403, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.7421566843986511, | |
| "rewards/margins": 0.24275951087474823, | |
| "rewards/rejected": 0.4993972182273865, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 27.657987594604492, | |
| "learning_rate": 1.9978250850229278e-05, | |
| "logits/chosen": -0.5602678060531616, | |
| "logits/rejected": -0.7431076765060425, | |
| "logps/chosen": -197.28172302246094, | |
| "logps/rejected": -180.0853271484375, | |
| "loss": 0.718, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.6257942914962769, | |
| "rewards/margins": 0.286087304353714, | |
| "rewards/rejected": 0.3397069573402405, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.12588512981904013, | |
| "grad_norm": 27.8662166595459, | |
| "learning_rate": 1.996825002871205e-05, | |
| "logits/chosen": -0.3598572611808777, | |
| "logits/rejected": -0.8388012647628784, | |
| "logps/chosen": -192.58541870117188, | |
| "logps/rejected": -165.87228393554688, | |
| "loss": 0.6815, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.49491995573043823, | |
| "rewards/margins": 0.3221299648284912, | |
| "rewards/rejected": 0.1727900207042694, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12981904012588513, | |
| "grad_norm": 21.444156646728516, | |
| "learning_rate": 1.9956366601445212e-05, | |
| "logits/chosen": -0.18239173293113708, | |
| "logits/rejected": -0.6315879225730896, | |
| "logps/chosen": -214.19509887695312, | |
| "logps/rejected": -185.4246368408203, | |
| "loss": 0.6328, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.5859188437461853, | |
| "rewards/margins": 0.4131649136543274, | |
| "rewards/rejected": 0.1727539300918579, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.13375295043273014, | |
| "grad_norm": 22.295812606811523, | |
| "learning_rate": 1.994260281273529e-05, | |
| "logits/chosen": -0.27679482102394104, | |
| "logits/rejected": -0.7712021470069885, | |
| "logps/chosen": -206.1096954345703, | |
| "logps/rejected": -173.62576293945312, | |
| "loss": 0.6613, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.6996821165084839, | |
| "rewards/margins": 0.3059811294078827, | |
| "rewards/rejected": 0.3937010169029236, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13768686073957515, | |
| "grad_norm": 33.50761413574219, | |
| "learning_rate": 1.9926961262014237e-05, | |
| "logits/chosen": -0.3116024136543274, | |
| "logits/rejected": -0.625832736492157, | |
| "logps/chosen": -219.8788604736328, | |
| "logps/rejected": -187.32510375976562, | |
| "loss": 0.746, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1785697937011719, | |
| "rewards/margins": 0.21889865398406982, | |
| "rewards/rejected": 0.9596711993217468, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14162077104642015, | |
| "grad_norm": 15.657761573791504, | |
| "learning_rate": 1.9909444903348546e-05, | |
| "logits/chosen": -0.005524394102394581, | |
| "logits/rejected": -0.3487216532230377, | |
| "logps/chosen": -228.5839385986328, | |
| "logps/rejected": -201.77001953125, | |
| "loss": 0.7435, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 1.1422548294067383, | |
| "rewards/margins": 0.15804262459278107, | |
| "rewards/rejected": 0.9842122793197632, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14555468135326516, | |
| "grad_norm": 26.140518188476562, | |
| "learning_rate": 1.9890057044881308e-05, | |
| "logits/chosen": -0.12314258515834808, | |
| "logits/rejected": -0.5814956426620483, | |
| "logps/chosen": -201.1555633544922, | |
| "logps/rejected": -167.4046173095703, | |
| "loss": 0.6795, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.0470006465911865, | |
| "rewards/margins": 0.35150283575057983, | |
| "rewards/rejected": 0.6954978108406067, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.14948859166011014, | |
| "grad_norm": 19.782007217407227, | |
| "learning_rate": 1.9868801348207467e-05, | |
| "logits/chosen": -0.11235501617193222, | |
| "logits/rejected": -0.5538455247879028, | |
| "logps/chosen": -204.25839233398438, | |
| "logps/rejected": -181.46743774414062, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.1285860538482666, | |
| "rewards/margins": 0.33753544092178345, | |
| "rewards/rejected": 0.7910505533218384, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.15342250196695514, | |
| "grad_norm": 19.97163200378418, | |
| "learning_rate": 1.9845681827682263e-05, | |
| "logits/chosen": -0.16671855747699738, | |
| "logits/rejected": -0.540806233882904, | |
| "logps/chosen": -194.2422332763672, | |
| "logps/rejected": -163.8104705810547, | |
| "loss": 0.6713, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6743755340576172, | |
| "rewards/margins": 0.26031339168548584, | |
| "rewards/rejected": 0.4140622019767761, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15735641227380015, | |
| "grad_norm": 18.71397590637207, | |
| "learning_rate": 1.982070284966309e-05, | |
| "logits/chosen": -0.1493137627840042, | |
| "logits/rejected": -0.43618321418762207, | |
| "logps/chosen": -202.78318786621094, | |
| "logps/rejected": -177.56668090820312, | |
| "loss": 0.6528, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.5738676190376282, | |
| "rewards/margins": 0.31430238485336304, | |
| "rewards/rejected": 0.25956520438194275, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15735641227380015, | |
| "eval_logits/chosen": 1.3314845561981201, | |
| "eval_logits/rejected": 1.1080169677734375, | |
| "eval_logps/chosen": -206.4569549560547, | |
| "eval_logps/rejected": -179.43057250976562, | |
| "eval_loss": 0.6942009329795837, | |
| "eval_rewards/accuracies": 0.604687511920929, | |
| "eval_rewards/chosen": 0.6053363680839539, | |
| "eval_rewards/margins": 0.2244330197572708, | |
| "eval_rewards/rejected": 0.38090336322784424, | |
| "eval_runtime": 309.8464, | |
| "eval_samples_per_second": 2.066, | |
| "eval_steps_per_second": 0.129, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 15.415759086608887, | |
| "learning_rate": 1.9793869131684884e-05, | |
| "logits/chosen": -0.08272367715835571, | |
| "logits/rejected": -0.4305300712585449, | |
| "logps/chosen": -196.86305236816406, | |
| "logps/rejected": -178.54037475585938, | |
| "loss": 0.7078, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.5516935586929321, | |
| "rewards/margins": 0.21864008903503418, | |
| "rewards/rejected": 0.33305343985557556, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.16522423288749016, | |
| "grad_norm": 28.38641929626465, | |
| "learning_rate": 1.9765185741569126e-05, | |
| "logits/chosen": -0.14836929738521576, | |
| "logits/rejected": -0.4139153063297272, | |
| "logps/chosen": -215.8746795654297, | |
| "logps/rejected": -190.37954711914062, | |
| "loss": 0.7474, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.6253814697265625, | |
| "rewards/margins": 0.11703801155090332, | |
| "rewards/rejected": 0.5083434581756592, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16915814319433517, | |
| "grad_norm": 23.663591384887695, | |
| "learning_rate": 1.9734658096466774e-05, | |
| "logits/chosen": 0.011041751131415367, | |
| "logits/rejected": -0.4074042737483978, | |
| "logps/chosen": -209.1394500732422, | |
| "logps/rejected": -178.0277099609375, | |
| "loss": 0.6711, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.3468799591064453, | |
| "rewards/margins": 0.2509341835975647, | |
| "rewards/rejected": 0.09594579041004181, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.17309205350118018, | |
| "grad_norm": 32.677852630615234, | |
| "learning_rate": 1.970229196183516e-05, | |
| "logits/chosen": -0.020372604951262474, | |
| "logits/rejected": -0.37563034892082214, | |
| "logps/chosen": -209.47402954101562, | |
| "logps/rejected": -177.0091094970703, | |
| "loss": 0.6983, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.594801127910614, | |
| "rewards/margins": 0.25161081552505493, | |
| "rewards/rejected": 0.34319034218788147, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17702596380802518, | |
| "grad_norm": 22.306182861328125, | |
| "learning_rate": 1.9668093450349125e-05, | |
| "logits/chosen": -0.1756196916103363, | |
| "logits/rejected": -0.5201798677444458, | |
| "logps/chosen": -217.6730499267578, | |
| "logps/rejected": -185.24819946289062, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.9194382429122925, | |
| "rewards/margins": 0.3321036696434021, | |
| "rewards/rejected": 0.5873345136642456, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1809598741148702, | |
| "grad_norm": 31.994035720825195, | |
| "learning_rate": 1.9632069020746574e-05, | |
| "logits/chosen": -0.3013627529144287, | |
| "logits/rejected": -0.7145218849182129, | |
| "logps/chosen": -206.0642547607422, | |
| "logps/rejected": -178.27896118164062, | |
| "loss": 0.6459, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.8781298398971558, | |
| "rewards/margins": 0.5241779088973999, | |
| "rewards/rejected": 0.353952020406723, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1848937844217152, | |
| "grad_norm": 29.714988708496094, | |
| "learning_rate": 1.959422547660869e-05, | |
| "logits/chosen": -0.2492908537387848, | |
| "logits/rejected": -0.779377818107605, | |
| "logps/chosen": -198.94345092773438, | |
| "logps/rejected": -169.714599609375, | |
| "loss": 0.6366, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.5711004734039307, | |
| "rewards/margins": 0.413928359746933, | |
| "rewards/rejected": 0.15717211365699768, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1888276947285602, | |
| "grad_norm": 24.506587982177734, | |
| "learning_rate": 1.955456996507499e-05, | |
| "logits/chosen": -0.019927600398659706, | |
| "logits/rejected": -0.43524104356765747, | |
| "logps/chosen": -197.2928009033203, | |
| "logps/rejected": -168.06382751464844, | |
| "loss": 0.6361, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.787044107913971, | |
| "rewards/margins": 0.3754611909389496, | |
| "rewards/rejected": 0.4115828573703766, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.19276160503540518, | |
| "grad_norm": 24.652503967285156, | |
| "learning_rate": 1.9513109975493553e-05, | |
| "logits/chosen": -0.30659085512161255, | |
| "logits/rejected": -0.6158447265625, | |
| "logps/chosen": -207.3615264892578, | |
| "logps/rejected": -198.04635620117188, | |
| "loss": 0.6338, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.8901578783988953, | |
| "rewards/margins": 0.45508089661598206, | |
| "rewards/rejected": 0.4350770115852356, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1966955153422502, | |
| "grad_norm": 22.106698989868164, | |
| "learning_rate": 1.9469853338006515e-05, | |
| "logits/chosen": -0.07243610918521881, | |
| "logits/rejected": -0.2781897187232971, | |
| "logps/chosen": -203.30215454101562, | |
| "logps/rejected": -188.57080078125, | |
| "loss": 0.7046, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.7840886116027832, | |
| "rewards/margins": 0.2757692337036133, | |
| "rewards/rejected": 0.5083193778991699, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2006294256490952, | |
| "grad_norm": 17.76561164855957, | |
| "learning_rate": 1.9424808222071337e-05, | |
| "logits/chosen": -0.1372375786304474, | |
| "logits/rejected": -0.4728778898715973, | |
| "logps/chosen": -218.58462524414062, | |
| "logps/rejected": -192.29983520507812, | |
| "loss": 0.622, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.8517538905143738, | |
| "rewards/margins": 0.47115468978881836, | |
| "rewards/rejected": 0.3805992603302002, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2045633359559402, | |
| "grad_norm": 21.741724014282227, | |
| "learning_rate": 1.9377983134917868e-05, | |
| "logits/chosen": -0.42930954694747925, | |
| "logits/rejected": -0.6508566737174988, | |
| "logps/chosen": -196.40382385253906, | |
| "logps/rejected": -180.81784057617188, | |
| "loss": 0.6814, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.42375677824020386, | |
| "rewards/margins": 0.3472265601158142, | |
| "rewards/rejected": 0.07653021067380905, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2084972462627852, | |
| "grad_norm": 25.856201171875, | |
| "learning_rate": 1.9329386919941694e-05, | |
| "logits/chosen": -0.5100887417793274, | |
| "logits/rejected": -0.896782398223877, | |
| "logps/chosen": -200.4944610595703, | |
| "logps/rejected": -168.5055694580078, | |
| "loss": 0.631, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.5903893709182739, | |
| "rewards/margins": 0.45923447608947754, | |
| "rewards/rejected": 0.13115492463111877, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.21243115656963021, | |
| "grad_norm": 21.10732078552246, | |
| "learning_rate": 1.927902875503397e-05, | |
| "logits/chosen": -0.2257436066865921, | |
| "logits/rejected": -0.6618258953094482, | |
| "logps/chosen": -216.7244415283203, | |
| "logps/rejected": -172.3234405517578, | |
| "loss": 0.6436, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.9558561444282532, | |
| "rewards/margins": 0.5136295557022095, | |
| "rewards/rejected": 0.44222649931907654, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21636506687647522, | |
| "grad_norm": 21.297080993652344, | |
| "learning_rate": 1.9226918150848067e-05, | |
| "logits/chosen": -0.325428307056427, | |
| "logits/rejected": -0.6309774518013, | |
| "logps/chosen": -190.9318389892578, | |
| "logps/rejected": -179.4983673095703, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.1083195209503174, | |
| "rewards/margins": 0.31002935767173767, | |
| "rewards/rejected": 0.7982901334762573, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.22029897718332023, | |
| "grad_norm": 21.540422439575195, | |
| "learning_rate": 1.9173064949003408e-05, | |
| "logits/chosen": -0.05009857565164566, | |
| "logits/rejected": -0.3596547245979309, | |
| "logps/chosen": -200.29823303222656, | |
| "logps/rejected": -180.3629150390625, | |
| "loss": 0.6645, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.4049633741378784, | |
| "rewards/margins": 0.5104038119316101, | |
| "rewards/rejected": 0.8945595026016235, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.22423288749016523, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.9128734540932494e-05, | |
| "logits/chosen": -0.3485383987426758, | |
| "logits/rejected": -0.5194178223609924, | |
| "logps/chosen": -197.75784301757812, | |
| "logps/rejected": -181.0018768310547, | |
| "loss": 0.7351, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.2131370306015015, | |
| "rewards/margins": 0.22653412818908691, | |
| "rewards/rejected": 0.9866029620170593, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.22816679779701024, | |
| "grad_norm": 24.915868759155273, | |
| "learning_rate": 1.9071770513468988e-05, | |
| "logits/chosen": -0.17852464318275452, | |
| "logits/rejected": -0.35372194647789, | |
| "logps/chosen": -193.89865112304688, | |
| "logps/rejected": -187.19973754882812, | |
| "loss": 0.7047, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.9371153116226196, | |
| "rewards/margins": 0.20712292194366455, | |
| "rewards/rejected": 0.7299925088882446, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.23210070810385522, | |
| "grad_norm": 19.513757705688477, | |
| "learning_rate": 1.901309318956141e-05, | |
| "logits/chosen": -0.4217872619628906, | |
| "logits/rejected": -0.7518173456192017, | |
| "logps/chosen": -194.53421020507812, | |
| "logps/rejected": -168.0951385498047, | |
| "loss": 0.7308, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.6975895166397095, | |
| "rewards/margins": 0.23306536674499512, | |
| "rewards/rejected": 0.46452417969703674, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.23603461841070023, | |
| "grad_norm": 18.220582962036133, | |
| "learning_rate": 1.8952713651021227e-05, | |
| "logits/chosen": -0.14223751425743103, | |
| "logits/rejected": -0.4979272484779358, | |
| "logps/chosen": -199.91549682617188, | |
| "logps/rejected": -177.2222900390625, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.819624125957489, | |
| "rewards/margins": 0.39503517746925354, | |
| "rewards/rejected": 0.42458897829055786, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23603461841070023, | |
| "eval_logits/chosen": 1.2563122510910034, | |
| "eval_logits/rejected": 1.0339769124984741, | |
| "eval_logps/chosen": -206.1991424560547, | |
| "eval_logps/rejected": -179.33786010742188, | |
| "eval_loss": 0.7167426347732544, | |
| "eval_rewards/accuracies": 0.6171875, | |
| "eval_rewards/chosen": 0.708461582660675, | |
| "eval_rewards/margins": 0.2904762327671051, | |
| "eval_rewards/rejected": 0.41798537969589233, | |
| "eval_runtime": 284.7459, | |
| "eval_samples_per_second": 2.248, | |
| "eval_steps_per_second": 0.14, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23996852871754523, | |
| "grad_norm": 23.576587677001953, | |
| "learning_rate": 1.8890643301140487e-05, | |
| "logits/chosen": -0.5384713411331177, | |
| "logits/rejected": -0.8448705673217773, | |
| "logps/chosen": -197.2958526611328, | |
| "logps/rejected": -165.64370727539062, | |
| "loss": 0.6409, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.6531845331192017, | |
| "rewards/margins": 0.39299410581588745, | |
| "rewards/rejected": 0.2601904273033142, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 18.40612030029297, | |
| "learning_rate": 1.8826893862538233e-05, | |
| "logits/chosen": -0.3022890090942383, | |
| "logits/rejected": -0.5158249735832214, | |
| "logps/chosen": -207.9346160888672, | |
| "logps/rejected": -193.0900115966797, | |
| "loss": 0.7895, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.7925726771354675, | |
| "rewards/margins": 0.08937112987041473, | |
| "rewards/rejected": 0.7032015919685364, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24783634933123525, | |
| "grad_norm": 18.7589168548584, | |
| "learning_rate": 1.8761477374946548e-05, | |
| "logits/chosen": -0.12031130492687225, | |
| "logits/rejected": -0.4747944474220276, | |
| "logps/chosen": -211.0299530029297, | |
| "logps/rejected": -186.3873291015625, | |
| "loss": 0.6952, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.9918599128723145, | |
| "rewards/margins": 0.28354746103286743, | |
| "rewards/rejected": 0.7083123922348022, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.25177025963808025, | |
| "grad_norm": 20.57366180419922, | |
| "learning_rate": 1.869440619293672e-05, | |
| "logits/chosen": 0.015002071857452393, | |
| "logits/rejected": -0.4523535668849945, | |
| "logps/chosen": -215.18704223632812, | |
| "logps/rejected": -179.958984375, | |
| "loss": 0.6336, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.9286333918571472, | |
| "rewards/margins": 0.4492555558681488, | |
| "rewards/rejected": 0.4793778359889984, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.25570416994492523, | |
| "grad_norm": 24.69734001159668, | |
| "learning_rate": 1.8625692983585976e-05, | |
| "logits/chosen": -0.3278903663158417, | |
| "logits/rejected": -0.9296085238456726, | |
| "logps/chosen": -212.3651580810547, | |
| "logps/rejected": -168.00753784179688, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.7741891741752625, | |
| "rewards/margins": 0.3930490016937256, | |
| "rewards/rejected": 0.38114017248153687, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.25963808025177026, | |
| "grad_norm": 27.854631423950195, | |
| "learning_rate": 1.855535072408516e-05, | |
| "logits/chosen": -0.4728453755378723, | |
| "logits/rejected": -0.6778625249862671, | |
| "logps/chosen": -211.9385528564453, | |
| "logps/rejected": -193.85667419433594, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.9169828295707703, | |
| "rewards/margins": 0.32869625091552734, | |
| "rewards/rejected": 0.5882865786552429, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26357199055861524, | |
| "grad_norm": 18.423259735107422, | |
| "learning_rate": 1.8483392699287858e-05, | |
| "logits/chosen": -0.05396045371890068, | |
| "logits/rejected": -0.5624040365219116, | |
| "logps/chosen": -222.1643524169922, | |
| "logps/rejected": -177.35289001464844, | |
| "loss": 0.6206, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.1117911338806152, | |
| "rewards/margins": 0.5459399223327637, | |
| "rewards/rejected": 0.5658511519432068, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2675059008654603, | |
| "grad_norm": 23.744850158691406, | |
| "learning_rate": 1.840983249920143e-05, | |
| "logits/chosen": -0.3244122564792633, | |
| "logits/rejected": -0.5297374725341797, | |
| "logps/chosen": -196.14691162109375, | |
| "logps/rejected": -188.9138946533203, | |
| "loss": 0.7056, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.8795296549797058, | |
| "rewards/margins": 0.3909408748149872, | |
| "rewards/rejected": 0.488588809967041, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.27143981117230526, | |
| "grad_norm": 18.513778686523438, | |
| "learning_rate": 1.8334684016420383e-05, | |
| "logits/chosen": -0.08137266337871552, | |
| "logits/rejected": -0.5458197593688965, | |
| "logps/chosen": -232.447509765625, | |
| "logps/rejected": -191.580078125, | |
| "loss": 0.6264, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.163153052330017, | |
| "rewards/margins": 0.4738085865974426, | |
| "rewards/rejected": 0.6893445253372192, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2753737214791503, | |
| "grad_norm": 15.827184677124023, | |
| "learning_rate": 1.8257961443502626e-05, | |
| "logits/chosen": -0.30110448598861694, | |
| "logits/rejected": -0.6258831024169922, | |
| "logps/chosen": -190.89808654785156, | |
| "logps/rejected": -173.31884765625, | |
| "loss": 0.6519, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.946982204914093, | |
| "rewards/margins": 0.39443182945251465, | |
| "rewards/rejected": 0.5525503754615784, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.27930763178599527, | |
| "grad_norm": 19.0930118560791, | |
| "learning_rate": 1.8179679270289048e-05, | |
| "logits/chosen": -0.2574307322502136, | |
| "logits/rejected": -0.7561649680137634, | |
| "logps/chosen": -201.4808349609375, | |
| "logps/rejected": -172.31173706054688, | |
| "loss": 0.6453, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.1022285223007202, | |
| "rewards/margins": 0.5637288689613342, | |
| "rewards/rejected": 0.5384997129440308, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2832415420928403, | |
| "grad_norm": 22.383216857910156, | |
| "learning_rate": 1.8099852281166974e-05, | |
| "logits/chosen": -0.2120940238237381, | |
| "logits/rejected": -0.7636501789093018, | |
| "logps/chosen": -209.04806518554688, | |
| "logps/rejected": -166.7012481689453, | |
| "loss": 0.6576, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.3941724300384521, | |
| "rewards/margins": 0.5536119341850281, | |
| "rewards/rejected": 0.8405605554580688, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2871754523996853, | |
| "grad_norm": 18.3509578704834, | |
| "learning_rate": 1.8018495552277987e-05, | |
| "logits/chosen": 0.07260416448116302, | |
| "logits/rejected": -0.2597780227661133, | |
| "logps/chosen": -208.8731689453125, | |
| "logps/rejected": -187.85023498535156, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.4085180759429932, | |
| "rewards/margins": 0.5379746556282043, | |
| "rewards/rejected": 0.8705434799194336, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2911093627065303, | |
| "grad_norm": 21.863872528076172, | |
| "learning_rate": 1.7935624448670625e-05, | |
| "logits/chosen": -0.4248635172843933, | |
| "logits/rejected": -0.4336097836494446, | |
| "logps/chosen": -179.680908203125, | |
| "logps/rejected": -173.14013671875, | |
| "loss": 0.75, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.0196665525436401, | |
| "rewards/margins": 0.20690293610095978, | |
| "rewards/rejected": 0.8127636909484863, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2950432730133753, | |
| "grad_norm": 26.93684196472168, | |
| "learning_rate": 1.785125462139855e-05, | |
| "logits/chosen": -0.16947659850120544, | |
| "logits/rejected": -0.451927125453949, | |
| "logps/chosen": -198.48106384277344, | |
| "logps/rejected": -174.99111938476562, | |
| "loss": 0.7696, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1577861309051514, | |
| "rewards/margins": 0.21412566304206848, | |
| "rewards/rejected": 0.9436607360839844, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2989771833202203, | |
| "grad_norm": 15.670443534851074, | |
| "learning_rate": 1.7765402004564687e-05, | |
| "logits/chosen": -0.1878432035446167, | |
| "logits/rejected": -0.5365083813667297, | |
| "logps/chosen": -204.27255249023438, | |
| "logps/rejected": -175.6739959716797, | |
| "loss": 0.6793, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.1427654027938843, | |
| "rewards/margins": 0.44054698944091797, | |
| "rewards/rejected": 0.7022184133529663, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3029110936270653, | |
| "grad_norm": 20.738510131835938, | |
| "learning_rate": 1.76780828123119e-05, | |
| "logits/chosen": -0.22227105498313904, | |
| "logits/rejected": -0.4939172863960266, | |
| "logps/chosen": -204.56930541992188, | |
| "logps/rejected": -187.81863403320312, | |
| "loss": 0.6359, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.086004376411438, | |
| "rewards/margins": 0.5049671530723572, | |
| "rewards/rejected": 0.5810372233390808, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3068450039339103, | |
| "grad_norm": 15.985719680786133, | |
| "learning_rate": 1.7589313535760787e-05, | |
| "logits/chosen": -0.33505499362945557, | |
| "logits/rejected": -0.5057377219200134, | |
| "logps/chosen": -203.09201049804688, | |
| "logps/rejected": -186.1582489013672, | |
| "loss": 0.728, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.9475752115249634, | |
| "rewards/margins": 0.21062707901000977, | |
| "rewards/rejected": 0.7369481325149536, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3107789142407553, | |
| "grad_norm": 15.00536823272705, | |
| "learning_rate": 1.7499110939895162e-05, | |
| "logits/chosen": -0.2682803273200989, | |
| "logits/rejected": -0.6644273400306702, | |
| "logps/chosen": -197.18655395507812, | |
| "logps/rejected": -184.64974975585938, | |
| "loss": 0.7331, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.4725784361362457, | |
| "rewards/margins": 0.17313337326049805, | |
| "rewards/rejected": 0.29944509267807007, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3147128245476003, | |
| "grad_norm": 18.541942596435547, | |
| "learning_rate": 1.7407492060395835e-05, | |
| "logits/chosen": -0.3485754132270813, | |
| "logits/rejected": -0.6408174633979797, | |
| "logps/chosen": -196.4596710205078, | |
| "logps/rejected": -178.34701538085938, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.33864206075668335, | |
| "rewards/margins": 0.22059743106365204, | |
| "rewards/rejected": 0.11804463714361191, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3147128245476003, | |
| "eval_logits/chosen": 1.2971076965332031, | |
| "eval_logits/rejected": 1.0804717540740967, | |
| "eval_logps/chosen": -207.33456420898438, | |
| "eval_logps/rejected": -180.31930541992188, | |
| "eval_loss": 0.7093836069107056, | |
| "eval_rewards/accuracies": 0.598437488079071, | |
| "eval_rewards/chosen": 0.2542892098426819, | |
| "eval_rewards/margins": 0.22887463867664337, | |
| "eval_rewards/rejected": 0.025414561852812767, | |
| "eval_runtime": 301.2073, | |
| "eval_samples_per_second": 2.125, | |
| "eval_steps_per_second": 0.133, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31864673485444533, | |
| "grad_norm": 22.79604148864746, | |
| "learning_rate": 1.731447420042321e-05, | |
| "logits/chosen": -0.33927303552627563, | |
| "logits/rejected": -0.5682342052459717, | |
| "logps/chosen": -190.31930541992188, | |
| "logps/rejected": -173.07032775878906, | |
| "loss": 0.7979, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0728757381439209, | |
| "rewards/margins": 0.02971130609512329, | |
| "rewards/rejected": 0.0431644432246685, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 22.005783081054688, | |
| "learning_rate": 1.7220074927349452e-05, | |
| "logits/chosen": -0.3349539339542389, | |
| "logits/rejected": -0.6785364151000977, | |
| "logps/chosen": -205.6999969482422, | |
| "logps/rejected": -174.34982299804688, | |
| "loss": 0.6723, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.04839733988046646, | |
| "rewards/margins": 0.2823019027709961, | |
| "rewards/rejected": -0.23390455543994904, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.32651455546813535, | |
| "grad_norm": 18.50445556640625, | |
| "learning_rate": 1.712431206944067e-05, | |
| "logits/chosen": -0.31676384806632996, | |
| "logits/rejected": -0.47476306557655334, | |
| "logps/chosen": -194.7633056640625, | |
| "logps/rejected": -185.64987182617188, | |
| "loss": 0.6637, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.2732119560241699, | |
| "rewards/margins": 0.3499985337257385, | |
| "rewards/rejected": -0.07678655534982681, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3304484657749803, | |
| "grad_norm": 21.16750144958496, | |
| "learning_rate": 1.7027203712489902e-05, | |
| "logits/chosen": -0.22730335593223572, | |
| "logits/rejected": -0.6324140429496765, | |
| "logps/chosen": -209.23678588867188, | |
| "logps/rejected": -177.7320098876953, | |
| "loss": 0.7066, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.41964513063430786, | |
| "rewards/margins": 0.263131707906723, | |
| "rewards/rejected": 0.15651337802410126, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.33438237608182536, | |
| "grad_norm": 21.21584129333496, | |
| "learning_rate": 1.6928768196401403e-05, | |
| "logits/chosen": -0.19787462055683136, | |
| "logits/rejected": -0.5100497007369995, | |
| "logps/chosen": -213.1494140625, | |
| "logps/rejected": -194.2113800048828, | |
| "loss": 0.7113, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.434047132730484, | |
| "rewards/margins": 0.20316064357757568, | |
| "rewards/rejected": 0.2308865338563919, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33831628638867034, | |
| "grad_norm": 26.320444107055664, | |
| "learning_rate": 1.682902411172698e-05, | |
| "logits/chosen": -0.27940934896469116, | |
| "logits/rejected": -0.6819210052490234, | |
| "logps/chosen": -191.19189453125, | |
| "logps/rejected": -160.06234741210938, | |
| "loss": 0.672, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.6531416177749634, | |
| "rewards/margins": 0.32751747965812683, | |
| "rewards/rejected": 0.32562416791915894, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3422501966955153, | |
| "grad_norm": 16.507688522338867, | |
| "learning_rate": 1.6727990296154962e-05, | |
| "logits/chosen": -0.43093472719192505, | |
| "logits/rejected": -0.6659766435623169, | |
| "logps/chosen": -194.37916564941406, | |
| "logps/rejected": -175.87298583984375, | |
| "loss": 0.6782, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.9038249254226685, | |
| "rewards/margins": 0.3305993974208832, | |
| "rewards/rejected": 0.5732254385948181, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.34618410700236035, | |
| "grad_norm": 15.00309944152832, | |
| "learning_rate": 1.6625685830952533e-05, | |
| "logits/chosen": -0.017139725387096405, | |
| "logits/rejected": -0.5116509199142456, | |
| "logps/chosen": -203.77554321289062, | |
| "logps/rejected": -166.87571716308594, | |
| "loss": 0.6715, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.8991168141365051, | |
| "rewards/margins": 0.4240299165248871, | |
| "rewards/rejected": 0.47508686780929565, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.35011801730920533, | |
| "grad_norm": 22.238525390625, | |
| "learning_rate": 1.6522130037362018e-05, | |
| "logits/chosen": -0.4809524416923523, | |
| "logits/rejected": -0.77618408203125, | |
| "logps/chosen": -183.9463348388672, | |
| "logps/rejected": -168.94070434570312, | |
| "loss": 0.7005, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.9337043762207031, | |
| "rewards/margins": 0.2749274969100952, | |
| "rewards/rejected": 0.6587768197059631, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.35405192761605037, | |
| "grad_norm": 17.745378494262695, | |
| "learning_rate": 1.641734247295189e-05, | |
| "logits/chosen": -0.4837673306465149, | |
| "logits/rejected": -0.8133207559585571, | |
| "logps/chosen": -187.5880126953125, | |
| "logps/rejected": -172.59933471679688, | |
| "loss": 0.6777, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.9450648427009583, | |
| "rewards/margins": 0.3392513394355774, | |
| "rewards/rejected": 0.6058135032653809, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.35798583792289534, | |
| "grad_norm": 21.806243896484375, | |
| "learning_rate": 1.63113429279231e-05, | |
| "logits/chosen": -0.3670351207256317, | |
| "logits/rejected": -0.7418017387390137, | |
| "logps/chosen": -221.2038116455078, | |
| "logps/rejected": -184.3399200439453, | |
| "loss": 0.7212, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.8858639001846313, | |
| "rewards/margins": 0.2686173915863037, | |
| "rewards/rejected": 0.6172465085983276, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3619197482297404, | |
| "grad_norm": 19.19058609008789, | |
| "learning_rate": 1.6204151421371504e-05, | |
| "logits/chosen": -0.5260201692581177, | |
| "logits/rejected": -0.887170672416687, | |
| "logps/chosen": -198.56930541992188, | |
| "logps/rejected": -170.34158325195312, | |
| "loss": 0.6642, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6595619320869446, | |
| "rewards/margins": 0.25892138481140137, | |
| "rewards/rejected": 0.4006405472755432, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 16.740882873535156, | |
| "learning_rate": 1.609578819750708e-05, | |
| "logits/chosen": -0.21146011352539062, | |
| "logits/rejected": -0.41337770223617554, | |
| "logps/chosen": -186.92779541015625, | |
| "logps/rejected": -183.7529754638672, | |
| "loss": 0.6911, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.568415641784668, | |
| "rewards/margins": 0.27034991979599, | |
| "rewards/rejected": 0.298065721988678, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3697875688434304, | |
| "grad_norm": 22.620988845825195, | |
| "learning_rate": 1.5986273721830557e-05, | |
| "logits/chosen": -0.17011170089244843, | |
| "logits/rejected": -0.5642642974853516, | |
| "logps/chosen": -206.16073608398438, | |
| "logps/rejected": -187.0243377685547, | |
| "loss": 0.73, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.44800883531570435, | |
| "rewards/margins": 0.19431404769420624, | |
| "rewards/rejected": 0.2536947727203369, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.37372147915027537, | |
| "grad_norm": 19.39198112487793, | |
| "learning_rate": 1.587562867726832e-05, | |
| "logits/chosen": -0.18244773149490356, | |
| "logits/rejected": -0.5230101346969604, | |
| "logps/chosen": -223.02371215820312, | |
| "logps/rejected": -198.8177032470703, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.4324628710746765, | |
| "rewards/margins": 0.22442837059497833, | |
| "rewards/rejected": 0.20803451538085938, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3776553894571204, | |
| "grad_norm": 19.32149314880371, | |
| "learning_rate": 1.5763873960266236e-05, | |
| "logits/chosen": -0.29324209690093994, | |
| "logits/rejected": -0.5279776453971863, | |
| "logps/chosen": -206.15469360351562, | |
| "logps/rejected": -188.80137634277344, | |
| "loss": 0.6942, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.4868395924568176, | |
| "rewards/margins": 0.33774086833000183, | |
| "rewards/rejected": 0.14909867942333221, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3815892997639654, | |
| "grad_norm": 19.483469009399414, | |
| "learning_rate": 1.5673685398812467e-05, | |
| "logits/chosen": -0.1828387826681137, | |
| "logits/rejected": -0.41064882278442383, | |
| "logps/chosen": -217.49295043945312, | |
| "logps/rejected": -198.88177490234375, | |
| "loss": 0.7507, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 1.0268093347549438, | |
| "rewards/margins": 0.21613208949565887, | |
| "rewards/rejected": 0.810677170753479, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.38552321007081036, | |
| "grad_norm": 28.394817352294922, | |
| "learning_rate": 1.555998659687541e-05, | |
| "logits/chosen": -0.49702200293540955, | |
| "logits/rejected": -1.0014259815216064, | |
| "logps/chosen": -197.88128662109375, | |
| "logps/rejected": -160.67999267578125, | |
| "loss": 0.6519, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.9069635272026062, | |
| "rewards/margins": 0.40647339820861816, | |
| "rewards/rejected": 0.5004900693893433, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3894571203776554, | |
| "grad_norm": 20.914031982421875, | |
| "learning_rate": 1.544523773472669e-05, | |
| "logits/chosen": 0.02130720391869545, | |
| "logits/rejected": -0.4486933648586273, | |
| "logps/chosen": -211.362060546875, | |
| "logps/rejected": -175.72430419921875, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.1584622859954834, | |
| "rewards/margins": 0.3790398836135864, | |
| "rewards/rejected": 0.7794222831726074, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.3933910306845004, | |
| "grad_norm": 14.320610046386719, | |
| "learning_rate": 1.532946048386001e-05, | |
| "logits/chosen": -0.010864943265914917, | |
| "logits/rejected": -0.5150319337844849, | |
| "logps/chosen": -207.92333984375, | |
| "logps/rejected": -178.11700439453125, | |
| "loss": 0.6224, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.1484225988388062, | |
| "rewards/margins": 0.4593985676765442, | |
| "rewards/rejected": 0.6890240907669067, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3933910306845004, | |
| "eval_logits/chosen": 1.327344536781311, | |
| "eval_logits/rejected": 1.1055529117584229, | |
| "eval_logps/chosen": -205.45755004882812, | |
| "eval_logps/rejected": -178.61904907226562, | |
| "eval_loss": 0.7026852369308472, | |
| "eval_rewards/accuracies": 0.620312511920929, | |
| "eval_rewards/chosen": 1.0051077604293823, | |
| "eval_rewards/margins": 0.29959002137184143, | |
| "eval_rewards/rejected": 0.7055177688598633, | |
| "eval_runtime": 297.7987, | |
| "eval_samples_per_second": 2.149, | |
| "eval_steps_per_second": 0.134, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3973249409913454, | |
| "grad_norm": 17.606443405151367, | |
| "learning_rate": 1.5212676709990762e-05, | |
| "logits/chosen": 0.12024303525686264, | |
| "logits/rejected": -0.33552008867263794, | |
| "logps/chosen": -205.59109497070312, | |
| "logps/rejected": -181.02566528320312, | |
| "loss": 0.6522, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.9811790585517883, | |
| "rewards/margins": 0.43993645906448364, | |
| "rewards/rejected": 0.5412425994873047, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.4012588512981904, | |
| "grad_norm": 23.3114070892334, | |
| "learning_rate": 1.509490846892649e-05, | |
| "logits/chosen": 0.01656034216284752, | |
| "logits/rejected": -0.5744299292564392, | |
| "logps/chosen": -211.2788543701172, | |
| "logps/rejected": -167.57276916503906, | |
| "loss": 0.6138, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.8017475008964539, | |
| "rewards/margins": 0.5002428293228149, | |
| "rewards/rejected": 0.3015046715736389, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4051927616050354, | |
| "grad_norm": 14.10328197479248, | |
| "learning_rate": 1.4976178002401408e-05, | |
| "logits/chosen": -0.3282383978366852, | |
| "logits/rejected": -0.48758015036582947, | |
| "logps/chosen": -200.8679962158203, | |
| "logps/rejected": -179.44241333007812, | |
| "loss": 0.6479, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.6766945719718933, | |
| "rewards/margins": 0.3457737863063812, | |
| "rewards/rejected": 0.33092084527015686, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4091266719118804, | |
| "grad_norm": 26.593978881835938, | |
| "learning_rate": 1.4856507733875837e-05, | |
| "logits/chosen": -0.1160442978143692, | |
| "logits/rejected": -0.4207191467285156, | |
| "logps/chosen": -190.7376708984375, | |
| "logps/rejected": -169.13816833496094, | |
| "loss": 0.7379, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.7750043869018555, | |
| "rewards/margins": 0.34026703238487244, | |
| "rewards/rejected": 0.43473726511001587, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.41306058221872544, | |
| "grad_norm": 17.67402458190918, | |
| "learning_rate": 1.4735920264301288e-05, | |
| "logits/chosen": -0.17023354768753052, | |
| "logits/rejected": -0.5197206735610962, | |
| "logps/chosen": -207.9748077392578, | |
| "logps/rejected": -182.002197265625, | |
| "loss": 0.7135, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.5062464475631714, | |
| "rewards/margins": 0.19488921761512756, | |
| "rewards/rejected": 0.31135720014572144, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.4169944925255704, | |
| "grad_norm": 16.364791870117188, | |
| "learning_rate": 1.4614438367852056e-05, | |
| "logits/chosen": -0.35339441895484924, | |
| "logits/rejected": -0.6959262490272522, | |
| "logps/chosen": -202.8052215576172, | |
| "logps/rejected": -167.2289276123047, | |
| "loss": 0.6573, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.89134281873703, | |
| "rewards/margins": 0.39820951223373413, | |
| "rewards/rejected": 0.4931332468986511, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4209284028324154, | |
| "grad_norm": 19.59364891052246, | |
| "learning_rate": 1.4492084987624071e-05, | |
| "logits/chosen": -0.1122426763176918, | |
| "logits/rejected": -0.44985610246658325, | |
| "logps/chosen": -204.77981567382812, | |
| "logps/rejected": -181.18716430664062, | |
| "loss": 0.6709, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.9437880516052246, | |
| "rewards/margins": 0.4574647545814514, | |
| "rewards/rejected": 0.48632335662841797, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.42486231313926043, | |
| "grad_norm": 17.59402084350586, | |
| "learning_rate": 1.4368883231301885e-05, | |
| "logits/chosen": -0.17638197541236877, | |
| "logits/rejected": -0.5632339715957642, | |
| "logps/chosen": -201.26885986328125, | |
| "logps/rejected": -170.08328247070312, | |
| "loss": 0.6228, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.184136986732483, | |
| "rewards/margins": 0.756480872631073, | |
| "rewards/rejected": 0.42765602469444275, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4287962234461054, | |
| "grad_norm": 27.206796646118164, | |
| "learning_rate": 1.4244856366794517e-05, | |
| "logits/chosen": -0.057549990713596344, | |
| "logits/rejected": -0.4487794041633606, | |
| "logps/chosen": -205.1177215576172, | |
| "logps/rejected": -177.13014221191406, | |
| "loss": 0.6294, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.0669742822647095, | |
| "rewards/margins": 0.5120534896850586, | |
| "rewards/rejected": 0.5549208521842957, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.43273013375295044, | |
| "grad_norm": 16.399995803833008, | |
| "learning_rate": 1.4120027817841098e-05, | |
| "logits/chosen": -0.133390873670578, | |
| "logits/rejected": -0.47696390748023987, | |
| "logps/chosen": -214.5057373046875, | |
| "logps/rejected": -193.0947265625, | |
| "loss": 0.808, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.8214758038520813, | |
| "rewards/margins": 0.04125159978866577, | |
| "rewards/rejected": 0.7802242040634155, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4366640440597954, | |
| "grad_norm": 18.979785919189453, | |
| "learning_rate": 1.399442115958704e-05, | |
| "logits/chosen": -0.569675862789154, | |
| "logits/rejected": -0.8924716711044312, | |
| "logps/chosen": -211.4713897705078, | |
| "logps/rejected": -183.01220703125, | |
| "loss": 0.6587, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.8996235132217407, | |
| "rewards/margins": 0.45010414719581604, | |
| "rewards/rejected": 0.4495193362236023, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.44059795436664045, | |
| "grad_norm": 21.638757705688477, | |
| "learning_rate": 1.3868060114131644e-05, | |
| "logits/chosen": -0.22702725231647491, | |
| "logits/rejected": -0.5234431028366089, | |
| "logps/chosen": -210.87393188476562, | |
| "logps/rejected": -195.6029052734375, | |
| "loss": 0.738, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 1.0586285591125488, | |
| "rewards/margins": 0.27768781781196594, | |
| "rewards/rejected": 0.7809406518936157, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.44453186467348543, | |
| "grad_norm": 23.013927459716797, | |
| "learning_rate": 1.3740968546047935e-05, | |
| "logits/chosen": -0.17697608470916748, | |
| "logits/rejected": -0.4483562409877777, | |
| "logps/chosen": -211.2060089111328, | |
| "logps/rejected": -197.86001586914062, | |
| "loss": 0.7594, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.9211471676826477, | |
| "rewards/margins": 0.0961461290717125, | |
| "rewards/rejected": 0.825001060962677, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.44846577498033047, | |
| "grad_norm": 20.101484298706055, | |
| "learning_rate": 1.3613170457875579e-05, | |
| "logits/chosen": -0.22834663093090057, | |
| "logits/rejected": -0.6228377223014832, | |
| "logps/chosen": -207.5561065673828, | |
| "logps/rejected": -182.3037567138672, | |
| "loss": 0.6097, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 1.1122691631317139, | |
| "rewards/margins": 0.5503975749015808, | |
| "rewards/rejected": 0.5618715882301331, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.45239968528717545, | |
| "grad_norm": 26.358943939208984, | |
| "learning_rate": 1.348468998558779e-05, | |
| "logits/chosen": -0.13707995414733887, | |
| "logits/rejected": -0.44805946946144104, | |
| "logps/chosen": -220.7776641845703, | |
| "logps/rejected": -201.1964874267578, | |
| "loss": 0.713, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.9859493374824524, | |
| "rewards/margins": 0.3383699953556061, | |
| "rewards/rejected": 0.6475793123245239, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4563335955940205, | |
| "grad_norm": 16.33328628540039, | |
| "learning_rate": 1.3355551394032968e-05, | |
| "logits/chosen": -0.31562569737434387, | |
| "logits/rejected": -0.6708458065986633, | |
| "logps/chosen": -203.0553436279297, | |
| "logps/rejected": -176.8132781982422, | |
| "loss": 0.6889, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.7495515942573547, | |
| "rewards/margins": 0.3594974875450134, | |
| "rewards/rejected": 0.39005404710769653, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.46026750590086546, | |
| "grad_norm": 29.162113189697266, | |
| "learning_rate": 1.3225779072352066e-05, | |
| "logits/chosen": -0.32384806871414185, | |
| "logits/rejected": -0.6729586124420166, | |
| "logps/chosen": -214.14102172851562, | |
| "logps/rejected": -184.0008087158203, | |
| "loss": 0.6698, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.8371032476425171, | |
| "rewards/margins": 0.3700554370880127, | |
| "rewards/rejected": 0.4670478403568268, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.46420141620771044, | |
| "grad_norm": 25.16128921508789, | |
| "learning_rate": 1.309539752937243e-05, | |
| "logits/chosen": -0.256720632314682, | |
| "logits/rejected": -0.4291699528694153, | |
| "logps/chosen": -191.2805938720703, | |
| "logps/rejected": -184.6292266845703, | |
| "loss": 0.6755, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6577237248420715, | |
| "rewards/margins": 0.28180426359176636, | |
| "rewards/rejected": 0.3759194016456604, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.46813532651455547, | |
| "grad_norm": 20.09102439880371, | |
| "learning_rate": 1.2964431388979075e-05, | |
| "logits/chosen": -0.3570843040943146, | |
| "logits/rejected": -0.8670114278793335, | |
| "logps/chosen": -203.76992797851562, | |
| "logps/rejected": -163.80783081054688, | |
| "loss": 0.6412, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.7835728526115417, | |
| "rewards/margins": 0.5176677703857422, | |
| "rewards/rejected": 0.2659050524234772, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.47206923682140045, | |
| "grad_norm": 22.330236434936523, | |
| "learning_rate": 1.2832905385464193e-05, | |
| "logits/chosen": -0.3153493404388428, | |
| "logits/rejected": -0.6954606771469116, | |
| "logps/chosen": -199.0489501953125, | |
| "logps/rejected": -172.42919921875, | |
| "loss": 0.6764, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.7396122217178345, | |
| "rewards/margins": 0.3455941677093506, | |
| "rewards/rejected": 0.39401811361312866, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47206923682140045, | |
| "eval_logits/chosen": 1.3154770135879517, | |
| "eval_logits/rejected": 1.0959367752075195, | |
| "eval_logps/chosen": -205.95361328125, | |
| "eval_logps/rejected": -179.14404296875, | |
| "eval_loss": 0.688846230506897, | |
| "eval_rewards/accuracies": 0.6234375238418579, | |
| "eval_rewards/chosen": 0.8066827058792114, | |
| "eval_rewards/margins": 0.3111591935157776, | |
| "eval_rewards/rejected": 0.49552351236343384, | |
| "eval_runtime": 282.0013, | |
| "eval_samples_per_second": 2.269, | |
| "eval_steps_per_second": 0.142, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4760031471282455, | |
| "grad_norm": 13.301490783691406, | |
| "learning_rate": 1.2700844358855853e-05, | |
| "logits/chosen": -0.2941150367259979, | |
| "logits/rejected": -0.7340162992477417, | |
| "logps/chosen": -194.4886932373047, | |
| "logps/rejected": -159.5877227783203, | |
| "loss": 0.6895, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.846507728099823, | |
| "rewards/margins": 0.3602963089942932, | |
| "rewards/rejected": 0.48621147871017456, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.47993705743509046, | |
| "grad_norm": 19.667444229125977, | |
| "learning_rate": 1.2568273250226681e-05, | |
| "logits/chosen": -0.2455168217420578, | |
| "logits/rejected": -0.608180820941925, | |
| "logps/chosen": -225.4668426513672, | |
| "logps/rejected": -192.55905151367188, | |
| "loss": 0.6672, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.017348289489746, | |
| "rewards/margins": 0.38524192571640015, | |
| "rewards/rejected": 0.632106363773346, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 24.933828353881836, | |
| "learning_rate": 1.243521709698351e-05, | |
| "logits/chosen": -0.28044039011001587, | |
| "logits/rejected": -0.5124521255493164, | |
| "logps/chosen": -199.1013641357422, | |
| "logps/rejected": -195.05728149414062, | |
| "loss": 0.6967, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.9729631543159485, | |
| "rewards/margins": 0.31783193349838257, | |
| "rewards/rejected": 0.6551311016082764, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 21.9912109375, | |
| "learning_rate": 1.230170102813879e-05, | |
| "logits/chosen": -0.6046349406242371, | |
| "logits/rejected": -0.8912727236747742, | |
| "logps/chosen": -193.95303344726562, | |
| "logps/rejected": -169.863037109375, | |
| "loss": 0.6994, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.9892646670341492, | |
| "rewards/margins": 0.35485339164733887, | |
| "rewards/rejected": 0.6344112753868103, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4917387883556255, | |
| "grad_norm": 14.393425941467285, | |
| "learning_rate": 1.2167750259564733e-05, | |
| "logits/chosen": -0.21057292819023132, | |
| "logits/rejected": -0.6453763246536255, | |
| "logps/chosen": -197.05722045898438, | |
| "logps/rejected": -194.5146942138672, | |
| "loss": 0.6655, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.8438342809677124, | |
| "rewards/margins": 0.3562160134315491, | |
| "rewards/rejected": 0.4876182973384857, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4956726986624705, | |
| "grad_norm": 27.751855850219727, | |
| "learning_rate": 1.203339008923103e-05, | |
| "logits/chosen": -0.08632899820804596, | |
| "logits/rejected": -0.5858111381530762, | |
| "logps/chosen": -210.37890625, | |
| "logps/rejected": -181.04751586914062, | |
| "loss": 0.7106, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.0096272230148315, | |
| "rewards/margins": 0.4222971796989441, | |
| "rewards/rejected": 0.587330162525177, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4996066089693155, | |
| "grad_norm": 21.017240524291992, | |
| "learning_rate": 1.1898645892427064e-05, | |
| "logits/chosen": -0.48605161905288696, | |
| "logits/rejected": -0.6945669651031494, | |
| "logps/chosen": -182.28805541992188, | |
| "logps/rejected": -169.93661499023438, | |
| "loss": 0.7755, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.5226560831069946, | |
| "rewards/margins": 0.05550839379429817, | |
| "rewards/rejected": 0.46714773774147034, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5035405192761605, | |
| "grad_norm": 20.2221622467041, | |
| "learning_rate": 1.1763543116969549e-05, | |
| "logits/chosen": -0.10474424064159393, | |
| "logits/rejected": -0.5913185477256775, | |
| "logps/chosen": -209.303466796875, | |
| "logps/rejected": -173.1480255126953, | |
| "loss": 0.6692, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.6786335706710815, | |
| "rewards/margins": 0.3942939341068268, | |
| "rewards/rejected": 0.28433966636657715, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5074744295830055, | |
| "grad_norm": 15.26221752166748, | |
| "learning_rate": 1.1628107278396432e-05, | |
| "logits/chosen": -0.06124790757894516, | |
| "logits/rejected": -0.3360343873500824, | |
| "logps/chosen": -202.93270874023438, | |
| "logps/rejected": -184.75259399414062, | |
| "loss": 0.6547, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.39857378602027893, | |
| "rewards/margins": 0.2742787301540375, | |
| "rewards/rejected": 0.12429501861333847, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5114083398898505, | |
| "grad_norm": 18.45632553100586, | |
| "learning_rate": 1.1492363955148023e-05, | |
| "logits/chosen": -0.1759663075208664, | |
| "logits/rejected": -0.6530739665031433, | |
| "logps/chosen": -218.36123657226562, | |
| "logps/rejected": -199.7471160888672, | |
| "loss": 0.653, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.5292393565177917, | |
| "rewards/margins": 0.3620988726615906, | |
| "rewards/rejected": 0.16714049875736237, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5153422501966956, | |
| "grad_norm": 16.891386032104492, | |
| "learning_rate": 1.1356338783736256e-05, | |
| "logits/chosen": -0.4392605721950531, | |
| "logits/rejected": -0.7525895237922668, | |
| "logps/chosen": -194.24301147460938, | |
| "logps/rejected": -182.4429473876953, | |
| "loss": 0.6259, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.4986444115638733, | |
| "rewards/margins": 0.49716418981552124, | |
| "rewards/rejected": 0.0014802322257310152, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5192761605035405, | |
| "grad_norm": 18.568416595458984, | |
| "learning_rate": 1.1220057453902973e-05, | |
| "logits/chosen": -0.2285362035036087, | |
| "logits/rejected": -0.6583995223045349, | |
| "logps/chosen": -219.6389617919922, | |
| "logps/rejected": -176.62965393066406, | |
| "loss": 0.6604, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.6904179453849792, | |
| "rewards/margins": 0.3659079670906067, | |
| "rewards/rejected": 0.32451000809669495, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5232100708103855, | |
| "grad_norm": 16.81451416015625, | |
| "learning_rate": 1.1083545703768137e-05, | |
| "logits/chosen": -0.3168891370296478, | |
| "logits/rejected": -0.5861741304397583, | |
| "logps/chosen": -198.4099578857422, | |
| "logps/rejected": -181.83871459960938, | |
| "loss": 0.736, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.6643240451812744, | |
| "rewards/margins": 0.17423763871192932, | |
| "rewards/rejected": 0.4900864064693451, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5271439811172305, | |
| "grad_norm": 20.030567169189453, | |
| "learning_rate": 1.0946829314968936e-05, | |
| "logits/chosen": -0.22313520312309265, | |
| "logits/rejected": -0.6608983874320984, | |
| "logps/chosen": -206.3205108642578, | |
| "logps/rejected": -178.14974975585938, | |
| "loss": 0.6314, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.784034252166748, | |
| "rewards/margins": 0.45540714263916016, | |
| "rewards/rejected": 0.3286270797252655, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5310778914240756, | |
| "grad_norm": 12.727190017700195, | |
| "learning_rate": 1.0809934107790675e-05, | |
| "logits/chosen": -0.1376127302646637, | |
| "logits/rejected": -0.5582663416862488, | |
| "logps/chosen": -207.121337890625, | |
| "logps/rejected": -189.23037719726562, | |
| "loss": 0.5616, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.1340868473052979, | |
| "rewards/margins": 0.6862513422966003, | |
| "rewards/rejected": 0.4478355050086975, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5350118017309206, | |
| "grad_norm": 15.704160690307617, | |
| "learning_rate": 1.0672885936290316e-05, | |
| "logits/chosen": -0.11958789825439453, | |
| "logits/rejected": -0.41796404123306274, | |
| "logps/chosen": -200.3405303955078, | |
| "logps/rejected": -185.74917602539062, | |
| "loss": 0.7025, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 1.0963573455810547, | |
| "rewards/margins": 0.3328610956668854, | |
| "rewards/rejected": 0.7634962797164917, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5389457120377655, | |
| "grad_norm": 16.583145141601562, | |
| "learning_rate": 1.05357106834137e-05, | |
| "logits/chosen": -0.035154812037944794, | |
| "logits/rejected": -0.6018010377883911, | |
| "logps/chosen": -214.5799102783203, | |
| "logps/rejected": -181.4016571044922, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.9387739300727844, | |
| "rewards/margins": 0.34907636046409607, | |
| "rewards/rejected": 0.5896975994110107, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5428796223446105, | |
| "grad_norm": 15.397040367126465, | |
| "learning_rate": 1.0398434256107291e-05, | |
| "logits/chosen": -0.3040166199207306, | |
| "logits/rejected": -0.6104984283447266, | |
| "logps/chosen": -190.73818969726562, | |
| "logps/rejected": -172.9613037109375, | |
| "loss": 0.6723, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.8516994714736938, | |
| "rewards/margins": 0.3647121787071228, | |
| "rewards/rejected": 0.48698729276657104, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5468135326514555, | |
| "grad_norm": 17.214340209960938, | |
| "learning_rate": 1.0261082580425366e-05, | |
| "logits/chosen": -0.25491005182266235, | |
| "logits/rejected": -0.7748223543167114, | |
| "logps/chosen": -205.028564453125, | |
| "logps/rejected": -169.1365966796875, | |
| "loss": 0.6359, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.8545015454292297, | |
| "rewards/margins": 0.4432094693183899, | |
| "rewards/rejected": 0.41129201650619507, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5507474429583006, | |
| "grad_norm": 18.72207260131836, | |
| "learning_rate": 1.012368159663363e-05, | |
| "logits/chosen": -0.43465644121170044, | |
| "logits/rejected": -0.6075267195701599, | |
| "logps/chosen": -198.85336303710938, | |
| "logps/rejected": -185.84034729003906, | |
| "loss": 0.6205, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.8257676959037781, | |
| "rewards/margins": 0.442889541387558, | |
| "rewards/rejected": 0.38287803530693054, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5507474429583006, | |
| "eval_logits/chosen": 1.289400339126587, | |
| "eval_logits/rejected": 1.06741201877594, | |
| "eval_logps/chosen": -206.27685546875, | |
| "eval_logps/rejected": -179.56541442871094, | |
| "eval_loss": 0.6758726835250854, | |
| "eval_rewards/accuracies": 0.6343749761581421, | |
| "eval_rewards/chosen": 0.6773768067359924, | |
| "eval_rewards/margins": 0.3504090905189514, | |
| "eval_rewards/rejected": 0.32696765661239624, | |
| "eval_runtime": 264.1292, | |
| "eval_samples_per_second": 2.423, | |
| "eval_steps_per_second": 0.151, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5546813532651456, | |
| "grad_norm": 20.8519344329834, | |
| "learning_rate": 9.98625725431013e-06, | |
| "logits/chosen": -0.020856428891420364, | |
| "logits/rejected": -0.20043806731700897, | |
| "logps/chosen": -193.96920776367188, | |
| "logps/rejected": -172.1241912841797, | |
| "loss": 0.7039, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.5150532722473145, | |
| "rewards/margins": 0.1648593544960022, | |
| "rewards/rejected": 0.35019388794898987, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5586152635719905, | |
| "grad_norm": 18.23834800720215, | |
| "learning_rate": 9.848835507444405e-06, | |
| "logits/chosen": -0.17138266563415527, | |
| "logits/rejected": -0.5400444269180298, | |
| "logps/chosen": -213.20947265625, | |
| "logps/rejected": -179.41683959960938, | |
| "loss": 0.5993, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.878060519695282, | |
| "rewards/margins": 0.5326789617538452, | |
| "rewards/rejected": 0.34538155794143677, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5625491738788355, | |
| "grad_norm": 17.19778060913086, | |
| "learning_rate": 9.71144230953582e-06, | |
| "logits/chosen": -0.15033751726150513, | |
| "logits/rejected": -0.6573851108551025, | |
| "logps/chosen": -209.91763305664062, | |
| "logps/rejected": -173.20547485351562, | |
| "loss": 0.637, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.7313550710678101, | |
| "rewards/margins": 0.45394793152809143, | |
| "rewards/rejected": 0.27740710973739624, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5664830841856806, | |
| "grad_norm": 17.859058380126953, | |
| "learning_rate": 9.574103608691974e-06, | |
| "logits/chosen": -0.1018882766366005, | |
| "logits/rejected": -0.3827294111251831, | |
| "logps/chosen": -217.5899658203125, | |
| "logps/rejected": -190.86546325683594, | |
| "loss": 0.7034, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.7803667187690735, | |
| "rewards/margins": 0.14793583750724792, | |
| "rewards/rejected": 0.632430911064148, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5704169944925256, | |
| "grad_norm": 17.891475677490234, | |
| "learning_rate": 9.436845342728142e-06, | |
| "logits/chosen": -0.23665161430835724, | |
| "logits/rejected": -0.6916168928146362, | |
| "logps/chosen": -198.93873596191406, | |
| "logps/rejected": -166.03292846679688, | |
| "loss": 0.6421, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.801128089427948, | |
| "rewards/margins": 0.4237571656703949, | |
| "rewards/rejected": 0.3773708939552307, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5743509047993706, | |
| "grad_norm": 17.744354248046875, | |
| "learning_rate": 9.299693434268653e-06, | |
| "logits/chosen": -0.01328353863209486, | |
| "logits/rejected": -0.2819923758506775, | |
| "logps/chosen": -207.9522705078125, | |
| "logps/rejected": -188.49993896484375, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.904397189617157, | |
| "rewards/margins": 0.36915481090545654, | |
| "rewards/rejected": 0.5352423787117004, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5782848151062155, | |
| "grad_norm": 18.68268394470215, | |
| "learning_rate": 9.162673785851131e-06, | |
| "logits/chosen": -0.39516356587409973, | |
| "logits/rejected": -0.7670010328292847, | |
| "logps/chosen": -204.0966796875, | |
| "logps/rejected": -170.11227416992188, | |
| "loss": 0.6341, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.8424245715141296, | |
| "rewards/margins": 0.40797433257102966, | |
| "rewards/rejected": 0.43445029854774475, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5822187254130606, | |
| "grad_norm": 14.530721664428711, | |
| "learning_rate": 9.025812275034541e-06, | |
| "logits/chosen": -0.14751622080802917, | |
| "logits/rejected": -0.5135005116462708, | |
| "logps/chosen": -225.6256866455078, | |
| "logps/rejected": -200.2797393798828, | |
| "loss": 0.621, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.029211401939392, | |
| "rewards/margins": 0.5424867868423462, | |
| "rewards/rejected": 0.48672476410865784, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5861526357199056, | |
| "grad_norm": 18.743927001953125, | |
| "learning_rate": 8.889134749511956e-06, | |
| "logits/chosen": -0.11462094634771347, | |
| "logits/rejected": -0.38805294036865234, | |
| "logps/chosen": -207.6776123046875, | |
| "logps/rejected": -181.88101196289062, | |
| "loss": 0.7368, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.7982211709022522, | |
| "rewards/margins": 0.21776151657104492, | |
| "rewards/rejected": 0.580459713935852, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5900865460267506, | |
| "grad_norm": 14.667529106140137, | |
| "learning_rate": 8.752667022228936e-06, | |
| "logits/chosen": -0.022926175966858864, | |
| "logits/rejected": -0.4718795418739319, | |
| "logps/chosen": -216.82284545898438, | |
| "logps/rejected": -186.5943603515625, | |
| "loss": 0.622, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.8725186586380005, | |
| "rewards/margins": 0.6078484058380127, | |
| "rewards/rejected": 0.264670193195343, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5940204563335956, | |
| "grad_norm": 20.248031616210938, | |
| "learning_rate": 8.616434866508519e-06, | |
| "logits/chosen": -0.15943610668182373, | |
| "logits/rejected": -0.6148089170455933, | |
| "logps/chosen": -209.1900177001953, | |
| "logps/rejected": -184.60047912597656, | |
| "loss": 0.6446, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.7891548871994019, | |
| "rewards/margins": 0.48758840560913086, | |
| "rewards/rejected": 0.30156660079956055, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5979543666404405, | |
| "grad_norm": 16.850963592529297, | |
| "learning_rate": 8.480464011183631e-06, | |
| "logits/chosen": -0.2673138678073883, | |
| "logits/rejected": -0.6848293542861938, | |
| "logps/chosen": -201.9542999267578, | |
| "logps/rejected": -168.80638122558594, | |
| "loss": 0.6669, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6175512671470642, | |
| "rewards/margins": 0.30348506569862366, | |
| "rewards/rejected": 0.31406617164611816, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6018882769472856, | |
| "grad_norm": 18.8007755279541, | |
| "learning_rate": 8.344780135737962e-06, | |
| "logits/chosen": -0.31253287196159363, | |
| "logits/rejected": -0.8586766123771667, | |
| "logps/chosen": -212.3469696044922, | |
| "logps/rejected": -163.8748321533203, | |
| "loss": 0.6595, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.8451669812202454, | |
| "rewards/margins": 0.5855604410171509, | |
| "rewards/rejected": 0.2596065402030945, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6058221872541306, | |
| "grad_norm": 13.551706314086914, | |
| "learning_rate": 8.209408865456127e-06, | |
| "logits/chosen": -0.13036459684371948, | |
| "logits/rejected": -0.4954930245876312, | |
| "logps/chosen": -213.2278289794922, | |
| "logps/rejected": -188.24514770507812, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.8455514907836914, | |
| "rewards/margins": 0.34862059354782104, | |
| "rewards/rejected": 0.49693092703819275, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 17.73063087463379, | |
| "learning_rate": 8.074375766584053e-06, | |
| "logits/chosen": 0.0039010108448565006, | |
| "logits/rejected": -0.5214850306510925, | |
| "logps/chosen": -213.3166046142578, | |
| "logps/rejected": -174.0699005126953, | |
| "loss": 0.717, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.6939308643341064, | |
| "rewards/margins": 0.32082659006118774, | |
| "rewards/rejected": 0.3731042742729187, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6136900078678206, | |
| "grad_norm": 14.620991706848145, | |
| "learning_rate": 7.939706341500555e-06, | |
| "logits/chosen": -0.04872986674308777, | |
| "logits/rejected": -0.4084659516811371, | |
| "logps/chosen": -194.51834106445312, | |
| "logps/rejected": -185.00225830078125, | |
| "loss": 0.5966, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.8033088445663452, | |
| "rewards/margins": 0.5693622827529907, | |
| "rewards/rejected": 0.23394668102264404, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6176239181746657, | |
| "grad_norm": 13.0098876953125, | |
| "learning_rate": 7.805426023900938e-06, | |
| "logits/chosen": -0.4255433976650238, | |
| "logits/rejected": -0.7939322590827942, | |
| "logps/chosen": -190.10177612304688, | |
| "logps/rejected": -162.91436767578125, | |
| "loss": 0.7034, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.6892917156219482, | |
| "rewards/margins": 0.3028218150138855, | |
| "rewards/rejected": 0.38646987080574036, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6215578284815106, | |
| "grad_norm": 22.03873634338379, | |
| "learning_rate": 7.671560173993588e-06, | |
| "logits/chosen": -0.08852169662714005, | |
| "logits/rejected": -0.4719138741493225, | |
| "logps/chosen": -199.76376342773438, | |
| "logps/rejected": -182.2493896484375, | |
| "loss": 0.6744, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.7909868359565735, | |
| "rewards/margins": 0.3397650420665741, | |
| "rewards/rejected": 0.4512217938899994, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6254917387883556, | |
| "grad_norm": 18.647151947021484, | |
| "learning_rate": 7.538134073710437e-06, | |
| "logits/chosen": -0.38996896147727966, | |
| "logits/rejected": -0.6869844198226929, | |
| "logps/chosen": -198.90866088867188, | |
| "logps/rejected": -178.61019897460938, | |
| "loss": 0.7028, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.7868278622627258, | |
| "rewards/margins": 0.44276612997055054, | |
| "rewards/rejected": 0.3440617322921753, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6294256490952006, | |
| "grad_norm": 17.837268829345703, | |
| "learning_rate": 7.405172921932214e-06, | |
| "logits/chosen": -0.09680289775133133, | |
| "logits/rejected": -0.4570208191871643, | |
| "logps/chosen": -196.43899536132812, | |
| "logps/rejected": -173.35025024414062, | |
| "loss": 0.6309, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.7571867108345032, | |
| "rewards/margins": 0.43233370780944824, | |
| "rewards/rejected": 0.32485300302505493, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6294256490952006, | |
| "eval_logits/chosen": 1.2894115447998047, | |
| "eval_logits/rejected": 1.0707098245620728, | |
| "eval_logps/chosen": -206.11080932617188, | |
| "eval_logps/rejected": -179.48574829101562, | |
| "eval_loss": 0.6793522834777832, | |
| "eval_rewards/accuracies": 0.6265624761581421, | |
| "eval_rewards/chosen": 0.7437959313392639, | |
| "eval_rewards/margins": 0.384955495595932, | |
| "eval_rewards/rejected": 0.3588404655456543, | |
| "eval_runtime": 298.0621, | |
| "eval_samples_per_second": 2.147, | |
| "eval_steps_per_second": 0.134, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6333595594020456, | |
| "grad_norm": 23.481149673461914, | |
| "learning_rate": 7.272701829729378e-06, | |
| "logits/chosen": -0.09348127245903015, | |
| "logits/rejected": -0.39429792761802673, | |
| "logps/chosen": -222.31369018554688, | |
| "logps/rejected": -189.89024353027344, | |
| "loss": 0.7434, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.7474627494812012, | |
| "rewards/margins": 0.24622318148612976, | |
| "rewards/rejected": 0.5012395977973938, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6372934697088907, | |
| "grad_norm": 18.71939468383789, | |
| "learning_rate": 7.140745815619632e-06, | |
| "logits/chosen": -0.09522039443254471, | |
| "logits/rejected": -0.4288865923881531, | |
| "logps/chosen": -198.81405639648438, | |
| "logps/rejected": -192.83120727539062, | |
| "loss": 0.6662, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.610317587852478, | |
| "rewards/margins": 0.3116861879825592, | |
| "rewards/rejected": 0.2986314296722412, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6412273800157356, | |
| "grad_norm": 18.34478759765625, | |
| "learning_rate": 7.009329800842929e-06, | |
| "logits/chosen": 0.017814218997955322, | |
| "logits/rejected": -0.3244866132736206, | |
| "logps/chosen": -229.75381469726562, | |
| "logps/rejected": -199.60000610351562, | |
| "loss": 0.7092, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.5720285177230835, | |
| "rewards/margins": 0.19818969070911407, | |
| "rewards/rejected": 0.3738388121128082, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 16.03777313232422, | |
| "learning_rate": 6.878478604654835e-06, | |
| "logits/chosen": -0.284344345331192, | |
| "logits/rejected": -0.6540359258651733, | |
| "logps/chosen": -195.71812438964844, | |
| "logps/rejected": -176.70550537109375, | |
| "loss": 0.5904, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.6928594708442688, | |
| "rewards/margins": 0.6011512875556946, | |
| "rewards/rejected": 0.09170810133218765, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6490952006294256, | |
| "grad_norm": 22.05975914001465, | |
| "learning_rate": 6.748216939639158e-06, | |
| "logits/chosen": 0.07760115712881088, | |
| "logits/rejected": -0.4913705885410309, | |
| "logps/chosen": -190.44102478027344, | |
| "logps/rejected": -163.40457153320312, | |
| "loss": 0.6636, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.5673459768295288, | |
| "rewards/margins": 0.46832141280174255, | |
| "rewards/rejected": 0.09902457147836685, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6530291109362707, | |
| "grad_norm": 19.04427146911621, | |
| "learning_rate": 6.618569407040736e-06, | |
| "logits/chosen": -0.2564006745815277, | |
| "logits/rejected": -0.621497392654419, | |
| "logps/chosen": -198.78524780273438, | |
| "logps/rejected": -172.7997283935547, | |
| "loss": 0.6624, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.6063997149467468, | |
| "rewards/margins": 0.4061097204685211, | |
| "rewards/rejected": 0.2002900391817093, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6569630212431157, | |
| "grad_norm": 13.502724647521973, | |
| "learning_rate": 6.489560492119225e-06, | |
| "logits/chosen": 0.06354556977748871, | |
| "logits/rejected": -0.4314854145050049, | |
| "logps/chosen": -215.6816864013672, | |
| "logps/rejected": -183.03579711914062, | |
| "loss": 0.6743, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.43139171600341797, | |
| "rewards/margins": 0.3207935392856598, | |
| "rewards/rejected": 0.11059819161891937, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.6608969315499607, | |
| "grad_norm": 15.181354522705078, | |
| "learning_rate": 6.361214559524817e-06, | |
| "logits/chosen": -0.3440548777580261, | |
| "logits/rejected": -0.6467902660369873, | |
| "logps/chosen": -194.0684814453125, | |
| "logps/rejected": -180.21780395507812, | |
| "loss": 0.615, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.5782068967819214, | |
| "rewards/margins": 0.4565269947052002, | |
| "rewards/rejected": 0.12167992442846298, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6648308418568056, | |
| "grad_norm": 79.10075378417969, | |
| "learning_rate": 6.233555848696724e-06, | |
| "logits/chosen": -0.293182373046875, | |
| "logits/rejected": -0.5915425419807434, | |
| "logps/chosen": -208.3809356689453, | |
| "logps/rejected": -191.13064575195312, | |
| "loss": 0.7247, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.5359665155410767, | |
| "rewards/margins": 0.28759217262268066, | |
| "rewards/rejected": 0.24837426841259003, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6687647521636507, | |
| "grad_norm": 18.02682113647461, | |
| "learning_rate": 6.1066084692853224e-06, | |
| "logits/chosen": -0.03417937830090523, | |
| "logits/rejected": -0.43492475152015686, | |
| "logps/chosen": -212.67398071289062, | |
| "logps/rejected": -183.54196166992188, | |
| "loss": 0.6832, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.3686201870441437, | |
| "rewards/margins": 0.2786737084388733, | |
| "rewards/rejected": 0.08994650840759277, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6726986624704957, | |
| "grad_norm": 17.677215576171875, | |
| "learning_rate": 5.980396396598777e-06, | |
| "logits/chosen": -0.2180563509464264, | |
| "logits/rejected": -0.3799629211425781, | |
| "logps/chosen": -192.2188720703125, | |
| "logps/rejected": -187.93289184570312, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.4506203234195709, | |
| "rewards/margins": 0.31998997926712036, | |
| "rewards/rejected": 0.13063031435012817, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6766325727773407, | |
| "grad_norm": 13.698114395141602, | |
| "learning_rate": 5.854943467075087e-06, | |
| "logits/chosen": -0.22957925498485565, | |
| "logits/rejected": -0.5203697085380554, | |
| "logps/chosen": -198.90037536621094, | |
| "logps/rejected": -180.50279235839844, | |
| "loss": 0.6282, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.4433286786079407, | |
| "rewards/margins": 0.4702211916446686, | |
| "rewards/rejected": -0.026892513036727905, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6805664830841857, | |
| "grad_norm": 16.75077247619629, | |
| "learning_rate": 5.730273373780309e-06, | |
| "logits/chosen": -0.3643267750740051, | |
| "logits/rejected": -0.7527881860733032, | |
| "logps/chosen": -193.90756225585938, | |
| "logps/rejected": -173.71755981445312, | |
| "loss": 0.6902, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.46958428621292114, | |
| "rewards/margins": 0.43391847610473633, | |
| "rewards/rejected": 0.03566574305295944, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6845003933910306, | |
| "grad_norm": 21.622961044311523, | |
| "learning_rate": 5.606409661933889e-06, | |
| "logits/chosen": -0.023716717958450317, | |
| "logits/rejected": -0.3822089731693268, | |
| "logps/chosen": -221.4508056640625, | |
| "logps/rejected": -188.75930786132812, | |
| "loss": 0.7406, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.6389329433441162, | |
| "rewards/margins": 0.3061096668243408, | |
| "rewards/rejected": 0.332823246717453, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6884343036978757, | |
| "grad_norm": 19.141998291015625, | |
| "learning_rate": 5.483375724461918e-06, | |
| "logits/chosen": -0.36916786432266235, | |
| "logits/rejected": -0.8393670320510864, | |
| "logps/chosen": -201.64920043945312, | |
| "logps/rejected": -163.6253662109375, | |
| "loss": 0.6788, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.36916905641555786, | |
| "rewards/margins": 0.3855450749397278, | |
| "rewards/rejected": -0.016376061365008354, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6923682140047207, | |
| "grad_norm": 20.840383529663086, | |
| "learning_rate": 5.361194797579108e-06, | |
| "logits/chosen": -0.27600985765457153, | |
| "logits/rejected": -0.7273412346839905, | |
| "logps/chosen": -213.323486328125, | |
| "logps/rejected": -172.2437286376953, | |
| "loss": 0.7035, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.7029854655265808, | |
| "rewards/margins": 0.3768990635871887, | |
| "rewards/rejected": 0.3260864317417145, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6963021243115657, | |
| "grad_norm": 20.24435806274414, | |
| "learning_rate": 5.239889956400435e-06, | |
| "logits/chosen": 0.13340488076210022, | |
| "logits/rejected": -0.46101540327072144, | |
| "logps/chosen": -217.2809295654297, | |
| "logps/rejected": -176.47802734375, | |
| "loss": 0.6408, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5957245230674744, | |
| "rewards/margins": 0.416795551776886, | |
| "rewards/rejected": 0.17892900109291077, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7002360346184107, | |
| "grad_norm": 18.37978172302246, | |
| "learning_rate": 5.119484110583135e-06, | |
| "logits/chosen": -0.4709344506263733, | |
| "logits/rejected": -0.7668399810791016, | |
| "logps/chosen": -200.41390991210938, | |
| "logps/rejected": -169.01779174804688, | |
| "loss": 0.6936, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.39610370993614197, | |
| "rewards/margins": 0.3159303665161133, | |
| "rewards/rejected": 0.08017335832118988, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7041699449252558, | |
| "grad_norm": 14.384517669677734, | |
| "learning_rate": 5.000000000000003e-06, | |
| "logits/chosen": -0.2237352579832077, | |
| "logits/rejected": -0.7978562116622925, | |
| "logps/chosen": -200.4236297607422, | |
| "logps/rejected": -168.09664916992188, | |
| "loss": 0.6054, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.5839098691940308, | |
| "rewards/margins": 0.5763157606124878, | |
| "rewards/rejected": 0.007594155613332987, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7081038552321007, | |
| "grad_norm": 23.844955444335938, | |
| "learning_rate": 4.881460190444726e-06, | |
| "logits/chosen": -0.57319176197052, | |
| "logits/rejected": -0.7391110062599182, | |
| "logps/chosen": -205.91015625, | |
| "logps/rejected": -186.86459350585938, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.5059628486633301, | |
| "rewards/margins": 0.3120475113391876, | |
| "rewards/rejected": 0.19391539692878723, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7081038552321007, | |
| "eval_logits/chosen": 1.2854810953140259, | |
| "eval_logits/rejected": 1.0660665035247803, | |
| "eval_logps/chosen": -206.6718292236328, | |
| "eval_logps/rejected": -179.9932861328125, | |
| "eval_loss": 0.6678879857063293, | |
| "eval_rewards/accuracies": 0.6265624761581421, | |
| "eval_rewards/chosen": 0.519389808177948, | |
| "eval_rewards/margins": 0.3635701537132263, | |
| "eval_rewards/rejected": 0.15581969916820526, | |
| "eval_runtime": 296.5851, | |
| "eval_samples_per_second": 2.158, | |
| "eval_steps_per_second": 0.135, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7120377655389457, | |
| "grad_norm": 14.835896492004395, | |
| "learning_rate": 4.763887069370107e-06, | |
| "logits/chosen": -0.1812276542186737, | |
| "logits/rejected": -0.5340962409973145, | |
| "logps/chosen": -184.94485473632812, | |
| "logps/rejected": -169.592041015625, | |
| "loss": 0.6794, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.4734250605106354, | |
| "rewards/margins": 0.3993573486804962, | |
| "rewards/rejected": 0.07406774908304214, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7159716758457907, | |
| "grad_norm": 24.3856143951416, | |
| "learning_rate": 4.64730284165996e-06, | |
| "logits/chosen": -0.04929916188120842, | |
| "logits/rejected": -0.5009157061576843, | |
| "logps/chosen": -225.0531768798828, | |
| "logps/rejected": -193.8749237060547, | |
| "loss": 0.6584, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.7070298194885254, | |
| "rewards/margins": 0.4039214551448822, | |
| "rewards/rejected": 0.3031083941459656, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7199055861526357, | |
| "grad_norm": 22.8303279876709, | |
| "learning_rate": 4.531729525435501e-06, | |
| "logits/chosen": 0.0025139451026916504, | |
| "logits/rejected": -0.6012422442436218, | |
| "logps/chosen": -205.25, | |
| "logps/rejected": -166.71438598632812, | |
| "loss": 0.639, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.6045829653739929, | |
| "rewards/margins": 0.42883044481277466, | |
| "rewards/rejected": 0.17575259506702423, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7238394964594808, | |
| "grad_norm": 14.778836250305176, | |
| "learning_rate": 4.417188947896983e-06, | |
| "logits/chosen": -0.30647343397140503, | |
| "logits/rejected": -0.6068025827407837, | |
| "logps/chosen": -185.31884765625, | |
| "logps/rejected": -171.61390686035156, | |
| "loss": 0.6358, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.5817626118659973, | |
| "rewards/margins": 0.4069378972053528, | |
| "rewards/rejected": 0.17482469975948334, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7277734067663257, | |
| "grad_norm": 14.139073371887207, | |
| "learning_rate": 4.303702741201431e-06, | |
| "logits/chosen": -0.5711551904678345, | |
| "logits/rejected": -0.8691667318344116, | |
| "logps/chosen": -192.8331298828125, | |
| "logps/rejected": -175.0562286376953, | |
| "loss": 0.6808, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.38311484456062317, | |
| "rewards/margins": 0.3181079924106598, | |
| "rewards/rejected": 0.0650068148970604, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 15.895308494567871, | |
| "learning_rate": 4.1912923383771685e-06, | |
| "logits/chosen": -0.36842986941337585, | |
| "logits/rejected": -0.7152490019798279, | |
| "logps/chosen": -211.0810089111328, | |
| "logps/rejected": -196.27755737304688, | |
| "loss": 0.6735, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.5359424948692322, | |
| "rewards/margins": 0.3752870559692383, | |
| "rewards/rejected": 0.16065548360347748, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7356412273800157, | |
| "grad_norm": 14.658058166503906, | |
| "learning_rate": 4.079978969275984e-06, | |
| "logits/chosen": -0.5706170797348022, | |
| "logits/rejected": -0.852310299873352, | |
| "logps/chosen": -176.20578002929688, | |
| "logps/rejected": -158.9827423095703, | |
| "loss": 0.7049, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.29048237204551697, | |
| "rewards/margins": 0.3135663866996765, | |
| "rewards/rejected": -0.023084009066224098, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7395751376868608, | |
| "grad_norm": 14.667938232421875, | |
| "learning_rate": 3.9697836565636484e-06, | |
| "logits/chosen": -0.0873163565993309, | |
| "logits/rejected": -0.4978067874908447, | |
| "logps/chosen": -219.19210815429688, | |
| "logps/rejected": -186.28640747070312, | |
| "loss": 0.6177, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.48243242502212524, | |
| "rewards/margins": 0.47774791717529297, | |
| "rewards/rejected": 0.004684485495090485, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7435090479937058, | |
| "grad_norm": 20.62685775756836, | |
| "learning_rate": 3.860727211749572e-06, | |
| "logits/chosen": -0.3459232449531555, | |
| "logits/rejected": -0.6185725927352905, | |
| "logps/chosen": -204.01295471191406, | |
| "logps/rejected": -179.14883422851562, | |
| "loss": 0.6996, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.7561392188072205, | |
| "rewards/margins": 0.43435636162757874, | |
| "rewards/rejected": 0.3217828571796417, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.7474429583005507, | |
| "grad_norm": 17.138633728027344, | |
| "learning_rate": 3.7528302312563447e-06, | |
| "logits/chosen": -0.21280460059642792, | |
| "logits/rejected": -0.6648741960525513, | |
| "logps/chosen": -207.45266723632812, | |
| "logps/rejected": -169.14617919921875, | |
| "loss": 0.7068, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.38267606496810913, | |
| "rewards/margins": 0.17945952713489532, | |
| "rewards/rejected": 0.2032165229320526, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7513768686073957, | |
| "grad_norm": 58.220947265625, | |
| "learning_rate": 3.646113092529878e-06, | |
| "logits/chosen": -0.21766535937786102, | |
| "logits/rejected": -0.6996904611587524, | |
| "logps/chosen": -225.0487060546875, | |
| "logps/rejected": -184.19442749023438, | |
| "loss": 0.7056, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.6735895872116089, | |
| "rewards/margins": 0.4389261305332184, | |
| "rewards/rejected": 0.2346634566783905, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.7553107789142408, | |
| "grad_norm": 20.379343032836914, | |
| "learning_rate": 3.5405959501909313e-06, | |
| "logits/chosen": -0.18848784267902374, | |
| "logits/rejected": -0.5305780172348022, | |
| "logps/chosen": -212.13162231445312, | |
| "logps/rejected": -186.52542114257812, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.3702337145805359, | |
| "rewards/margins": 0.29016590118408203, | |
| "rewards/rejected": 0.08006780594587326, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7592446892210858, | |
| "grad_norm": 17.178056716918945, | |
| "learning_rate": 3.436298732228699e-06, | |
| "logits/chosen": -0.21896116435527802, | |
| "logits/rejected": -0.6624099612236023, | |
| "logps/chosen": -205.2207794189453, | |
| "logps/rejected": -170.05699157714844, | |
| "loss": 0.6446, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.4240780472755432, | |
| "rewards/margins": 0.37573105096817017, | |
| "rewards/rejected": 0.04834695905447006, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.7631785995279308, | |
| "grad_norm": 19.06415557861328, | |
| "learning_rate": 3.3332411362372063e-06, | |
| "logits/chosen": -0.15206289291381836, | |
| "logits/rejected": -0.4406839907169342, | |
| "logps/chosen": -186.83627319335938, | |
| "logps/rejected": -164.04739379882812, | |
| "loss": 0.6972, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.5690515041351318, | |
| "rewards/margins": 0.338064044713974, | |
| "rewards/rejected": 0.23098750412464142, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7671125098347757, | |
| "grad_norm": 19.997249603271484, | |
| "learning_rate": 3.231442625695217e-06, | |
| "logits/chosen": -0.4492325186729431, | |
| "logits/rejected": -0.6821542978286743, | |
| "logps/chosen": -192.6551971435547, | |
| "logps/rejected": -174.02772521972656, | |
| "loss": 0.6523, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.41465169191360474, | |
| "rewards/margins": 0.4233173727989197, | |
| "rewards/rejected": -0.008665725588798523, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7710464201416207, | |
| "grad_norm": 21.12126350402832, | |
| "learning_rate": 3.1309224262903614e-06, | |
| "logits/chosen": -0.0248140636831522, | |
| "logits/rejected": -0.2627066373825073, | |
| "logps/chosen": -214.6104278564453, | |
| "logps/rejected": -192.9540557861328, | |
| "loss": 0.6733, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.5803993940353394, | |
| "rewards/margins": 0.2992710471153259, | |
| "rewards/rejected": 0.28112831711769104, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7749803304484658, | |
| "grad_norm": 12.457499504089355, | |
| "learning_rate": 3.0316995222881584e-06, | |
| "logits/chosen": -0.40065187215805054, | |
| "logits/rejected": -0.8357529640197754, | |
| "logps/chosen": -192.20655822753906, | |
| "logps/rejected": -164.68626403808594, | |
| "loss": 0.6292, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5738979578018188, | |
| "rewards/margins": 0.4073941111564636, | |
| "rewards/rejected": 0.1665038764476776, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.7789142407553108, | |
| "grad_norm": 12.965932846069336, | |
| "learning_rate": 2.9337926529466578e-06, | |
| "logits/chosen": -0.5754062533378601, | |
| "logits/rejected": -0.9457462430000305, | |
| "logps/chosen": -189.44522094726562, | |
| "logps/rejected": -169.0963897705078, | |
| "loss": 0.6242, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.49839162826538086, | |
| "rewards/margins": 0.4758077561855316, | |
| "rewards/rejected": 0.02258378639817238, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7828481510621558, | |
| "grad_norm": 14.567062377929688, | |
| "learning_rate": 2.83722030897733e-06, | |
| "logits/chosen": 0.24449042975902557, | |
| "logits/rejected": -0.30078762769699097, | |
| "logps/chosen": -205.9731903076172, | |
| "logps/rejected": -173.31008911132812, | |
| "loss": 0.5947, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.5201369524002075, | |
| "rewards/margins": 0.5564968585968018, | |
| "rewards/rejected": -0.036359887570142746, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.7867820613690008, | |
| "grad_norm": 18.595260620117188, | |
| "learning_rate": 2.7420007290529118e-06, | |
| "logits/chosen": -0.1308153122663498, | |
| "logits/rejected": -0.6352800726890564, | |
| "logps/chosen": -224.5437469482422, | |
| "logps/rejected": -178.47549438476562, | |
| "loss": 0.6361, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.6366292238235474, | |
| "rewards/margins": 0.4467080235481262, | |
| "rewards/rejected": 0.18992114067077637, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7867820613690008, | |
| "eval_logits/chosen": 1.278507113456726, | |
| "eval_logits/rejected": 1.058009147644043, | |
| "eval_logps/chosen": -206.54354858398438, | |
| "eval_logps/rejected": -179.86978149414062, | |
| "eval_loss": 0.6649525165557861, | |
| "eval_rewards/accuracies": 0.625, | |
| "eval_rewards/chosen": 0.5706965923309326, | |
| "eval_rewards/margins": 0.3654647767543793, | |
| "eval_rewards/rejected": 0.20523183047771454, | |
| "eval_runtime": 301.4428, | |
| "eval_samples_per_second": 2.123, | |
| "eval_steps_per_second": 0.133, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7907159716758458, | |
| "grad_norm": 19.2440242767334, | |
| "learning_rate": 2.6481518963628383e-06, | |
| "logits/chosen": -0.11340751498937607, | |
| "logits/rejected": -0.31099405884742737, | |
| "logps/chosen": -212.424072265625, | |
| "logps/rejected": -195.0722198486328, | |
| "loss": 0.6193, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.5051929354667664, | |
| "rewards/margins": 0.5108169317245483, | |
| "rewards/rejected": -0.00562392920255661, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7946498819826908, | |
| "grad_norm": 17.27981185913086, | |
| "learning_rate": 2.555691535216944e-06, | |
| "logits/chosen": -0.2921395003795624, | |
| "logits/rejected": -0.7080395817756653, | |
| "logps/chosen": -208.31747436523438, | |
| "logps/rejected": -180.02212524414062, | |
| "loss": 0.676, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.4920189380645752, | |
| "rewards/margins": 0.2944754660129547, | |
| "rewards/rejected": 0.19754347205162048, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7985837922895358, | |
| "grad_norm": 12.276522636413574, | |
| "learning_rate": 2.464637107698046e-06, | |
| "logits/chosen": -0.3768986165523529, | |
| "logits/rejected": -0.9090649485588074, | |
| "logps/chosen": -195.61764526367188, | |
| "logps/rejected": -158.5428466796875, | |
| "loss": 0.676, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.35970592498779297, | |
| "rewards/margins": 0.34655410051345825, | |
| "rewards/rejected": 0.013151821680366993, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8025177025963808, | |
| "grad_norm": 12.748953819274902, | |
| "learning_rate": 2.3750058103640427e-06, | |
| "logits/chosen": -0.3452379107475281, | |
| "logits/rejected": -0.8985518217086792, | |
| "logps/chosen": -209.6136016845703, | |
| "logps/rejected": -173.26414489746094, | |
| "loss": 0.6122, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.5482198596000671, | |
| "rewards/margins": 0.4972603917121887, | |
| "rewards/rejected": 0.05095947906374931, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 15.02308177947998, | |
| "learning_rate": 2.286814571000171e-06, | |
| "logits/chosen": -0.2370177060365677, | |
| "logits/rejected": -0.6736031770706177, | |
| "logps/chosen": -194.3092041015625, | |
| "logps/rejected": -164.15817260742188, | |
| "loss": 0.6226, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.5286887288093567, | |
| "rewards/margins": 0.4581621289253235, | |
| "rewards/rejected": 0.0705266147851944, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8103855232100708, | |
| "grad_norm": 15.919551849365234, | |
| "learning_rate": 2.2000800454220285e-06, | |
| "logits/chosen": -0.04363623261451721, | |
| "logits/rejected": -0.4236673414707184, | |
| "logps/chosen": -209.69235229492188, | |
| "logps/rejected": -175.1033935546875, | |
| "loss": 0.6664, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.5712024569511414, | |
| "rewards/margins": 0.4244155287742615, | |
| "rewards/rejected": 0.1467868834733963, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8143194335169158, | |
| "grad_norm": 16.688159942626953, | |
| "learning_rate": 2.114818614329945e-06, | |
| "logits/chosen": -0.18427999317646027, | |
| "logits/rejected": -0.4734131693840027, | |
| "logps/chosen": -200.3739776611328, | |
| "logps/rejected": -180.11984252929688, | |
| "loss": 0.6447, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.6805271506309509, | |
| "rewards/margins": 0.4109037518501282, | |
| "rewards/rejected": 0.26962336897850037, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8182533438237608, | |
| "grad_norm": 15.36899471282959, | |
| "learning_rate": 2.031046380215327e-06, | |
| "logits/chosen": -0.5546427965164185, | |
| "logits/rejected": -0.8263591527938843, | |
| "logps/chosen": -180.88345336914062, | |
| "logps/rejected": -167.74163818359375, | |
| "loss": 0.6247, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.5958175659179688, | |
| "rewards/margins": 0.5058714151382446, | |
| "rewards/rejected": 0.08994609862565994, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8221872541306058, | |
| "grad_norm": 24.130155563354492, | |
| "learning_rate": 1.9487791643195276e-06, | |
| "logits/chosen": -0.3917720317840576, | |
| "logits/rejected": -0.7242711782455444, | |
| "logps/chosen": -206.98135375976562, | |
| "logps/rejected": -183.48318481445312, | |
| "loss": 0.7598, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.24247002601623535, | |
| "rewards/margins": 0.1793862134218216, | |
| "rewards/rejected": 0.06308381259441376, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8261211644374509, | |
| "grad_norm": 16.544754028320312, | |
| "learning_rate": 1.8680325036458535e-06, | |
| "logits/chosen": -0.16317354142665863, | |
| "logits/rejected": -0.5910676717758179, | |
| "logps/chosen": -204.1961669921875, | |
| "logps/rejected": -173.1997833251953, | |
| "loss": 0.706, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.40617918968200684, | |
| "rewards/margins": 0.3612835705280304, | |
| "rewards/rejected": 0.04489566385746002, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8300550747442959, | |
| "grad_norm": 28.87963104248047, | |
| "learning_rate": 1.788821648025242e-06, | |
| "logits/chosen": -0.46491608023643494, | |
| "logits/rejected": -0.5262236595153809, | |
| "logps/chosen": -198.90652465820312, | |
| "logps/rejected": -188.23049926757812, | |
| "loss": 0.7507, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.4018153250217438, | |
| "rewards/margins": 0.1714794784784317, | |
| "rewards/rejected": 0.23033586144447327, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8339889850511408, | |
| "grad_norm": 21.89056968688965, | |
| "learning_rate": 1.7111615572361628e-06, | |
| "logits/chosen": -0.1197819709777832, | |
| "logits/rejected": -0.40464717149734497, | |
| "logps/chosen": -211.6194305419922, | |
| "logps/rejected": -192.4689483642578, | |
| "loss": 0.7031, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.44010597467422485, | |
| "rewards/margins": 0.23361381888389587, | |
| "rewards/rejected": 0.20649214088916779, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8379228953579858, | |
| "grad_norm": 25.025197982788086, | |
| "learning_rate": 1.6350668981793304e-06, | |
| "logits/chosen": -0.21810774505138397, | |
| "logits/rejected": -0.536165714263916, | |
| "logps/chosen": -195.72702026367188, | |
| "logps/rejected": -185.3990478515625, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.22649447619915009, | |
| "rewards/margins": 0.3016238212585449, | |
| "rewards/rejected": -0.07512933015823364, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8418568056648308, | |
| "grad_norm": 15.117574691772461, | |
| "learning_rate": 1.5605520421076969e-06, | |
| "logits/chosen": -0.34034574031829834, | |
| "logits/rejected": -0.5113102793693542, | |
| "logps/chosen": -195.9296417236328, | |
| "logps/rejected": -186.29287719726562, | |
| "loss": 0.6485, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.23202356696128845, | |
| "rewards/margins": 0.3296849727630615, | |
| "rewards/rejected": -0.09766140580177307, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8457907159716759, | |
| "grad_norm": 16.99416732788086, | |
| "learning_rate": 1.487631061912298e-06, | |
| "logits/chosen": -0.5572665929794312, | |
| "logits/rejected": -0.8171085119247437, | |
| "logps/chosen": -193.6608123779297, | |
| "logps/rejected": -176.0238800048828, | |
| "loss": 0.6605, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.35814136266708374, | |
| "rewards/margins": 0.37609419226646423, | |
| "rewards/rejected": -0.017952853813767433, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8497246262785209, | |
| "grad_norm": 14.536643981933594, | |
| "learning_rate": 1.4163177294644438e-06, | |
| "logits/chosen": -0.2895492613315582, | |
| "logits/rejected": -0.48721733689308167, | |
| "logps/chosen": -198.87753295898438, | |
| "logps/rejected": -183.21096801757812, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.22944995760917664, | |
| "rewards/margins": 0.3464585840702057, | |
| "rewards/rejected": -0.11700858920812607, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 25.793216705322266, | |
| "learning_rate": 1.3466255130147622e-06, | |
| "logits/chosen": -0.36471131443977356, | |
| "logits/rejected": -0.5930619239807129, | |
| "logps/chosen": -187.9856719970703, | |
| "logps/rejected": -175.9360809326172, | |
| "loss": 0.683, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.1035921722650528, | |
| "rewards/margins": 0.23180215060710907, | |
| "rewards/rejected": -0.12820999324321747, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8575924468922108, | |
| "grad_norm": 20.578927993774414, | |
| "learning_rate": 1.2785675746495752e-06, | |
| "logits/chosen": -0.24610432982444763, | |
| "logits/rejected": -0.7905102968215942, | |
| "logps/chosen": -188.7552032470703, | |
| "logps/rejected": -163.68289184570312, | |
| "loss": 0.6594, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.19958016276359558, | |
| "rewards/margins": 0.35812973976135254, | |
| "rewards/rejected": -0.15854960680007935, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8615263571990559, | |
| "grad_norm": 17.24201011657715, | |
| "learning_rate": 1.212156767805115e-06, | |
| "logits/chosen": -0.3163990080356598, | |
| "logits/rejected": -0.8110219240188599, | |
| "logps/chosen": -175.55859375, | |
| "logps/rejected": -141.836181640625, | |
| "loss": 0.6571, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.13926038146018982, | |
| "rewards/margins": 0.2834976315498352, | |
| "rewards/rejected": -0.1442372053861618, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.8654602675059009, | |
| "grad_norm": 13.562137603759766, | |
| "learning_rate": 1.1474056348400141e-06, | |
| "logits/chosen": -0.25132131576538086, | |
| "logits/rejected": -0.5677313804626465, | |
| "logps/chosen": -192.50961303710938, | |
| "logps/rejected": -171.54611206054688, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.3157256245613098, | |
| "rewards/margins": 0.3246195316314697, | |
| "rewards/rejected": -0.008893907070159912, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8654602675059009, | |
| "eval_logits/chosen": 1.2642682790756226, | |
| "eval_logits/rejected": 1.043653964996338, | |
| "eval_logps/chosen": -207.01547241210938, | |
| "eval_logps/rejected": -180.30709838867188, | |
| "eval_loss": 0.6631070971488953, | |
| "eval_rewards/accuracies": 0.6265624761581421, | |
| "eval_rewards/chosen": 0.38192370533943176, | |
| "eval_rewards/margins": 0.3516288101673126, | |
| "eval_rewards/rejected": 0.03029490076005459, | |
| "eval_runtime": 300.501, | |
| "eval_samples_per_second": 2.13, | |
| "eval_steps_per_second": 0.133, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8693941778127459, | |
| "grad_norm": 21.122217178344727, | |
| "learning_rate": 1.0843264046665558e-06, | |
| "logits/chosen": -0.5116424560546875, | |
| "logits/rejected": -0.6911696195602417, | |
| "logps/chosen": -184.15603637695312, | |
| "logps/rejected": -175.35256958007812, | |
| "loss": 0.757, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.15398895740509033, | |
| "rewards/margins": 0.07341472804546356, | |
| "rewards/rejected": 0.08057420700788498, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.8733280881195908, | |
| "grad_norm": 12.891325950622559, | |
| "learning_rate": 1.0229309904411178e-06, | |
| "logits/chosen": -0.5018507838249207, | |
| "logits/rejected": -0.8595576286315918, | |
| "logps/chosen": -195.92578125, | |
| "logps/rejected": -172.9355010986328, | |
| "loss": 0.6866, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.32908183336257935, | |
| "rewards/margins": 0.38747507333755493, | |
| "rewards/rejected": -0.05839322879910469, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8772619984264359, | |
| "grad_norm": 17.477975845336914, | |
| "learning_rate": 9.63230987314251e-07, | |
| "logits/chosen": -0.27941471338272095, | |
| "logits/rejected": -0.5305674076080322, | |
| "logps/chosen": -191.43380737304688, | |
| "logps/rejected": -170.3942108154297, | |
| "loss": 0.6999, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.39622825384140015, | |
| "rewards/margins": 0.23104743659496307, | |
| "rewards/rejected": 0.16518081724643707, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.8811959087332809, | |
| "grad_norm": 19.63365936279297, | |
| "learning_rate": 9.052376702408206e-07, | |
| "logits/chosen": -0.4624987542629242, | |
| "logits/rejected": -0.5762002468109131, | |
| "logps/chosen": -187.87295532226562, | |
| "logps/rejected": -193.58670043945312, | |
| "loss": 0.7027, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.4038239121437073, | |
| "rewards/margins": 0.24877241253852844, | |
| "rewards/rejected": 0.15505146980285645, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8851298190401259, | |
| "grad_norm": 20.428455352783203, | |
| "learning_rate": 8.489619918506098e-07, | |
| "logits/chosen": -0.23860251903533936, | |
| "logits/rejected": -0.6500253677368164, | |
| "logps/chosen": -212.96658325195312, | |
| "logps/rejected": -179.9956512451172, | |
| "loss": 0.7073, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.24726350605487823, | |
| "rewards/margins": 0.2762225866317749, | |
| "rewards/rejected": -0.028959061950445175, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.8890637293469709, | |
| "grad_norm": 19.340242385864258, | |
| "learning_rate": 7.944145803798064e-07, | |
| "logits/chosen": -0.23527947068214417, | |
| "logits/rejected": -0.59322589635849, | |
| "logps/chosen": -203.28225708007812, | |
| "logps/rejected": -180.2418670654297, | |
| "loss": 0.681, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.3411061465740204, | |
| "rewards/margins": 0.18520446121692657, | |
| "rewards/rejected": 0.155901700258255, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8929976396538158, | |
| "grad_norm": 79.16990661621094, | |
| "learning_rate": 7.416057376637543e-07, | |
| "logits/chosen": -0.3579210638999939, | |
| "logits/rejected": -0.6960107088088989, | |
| "logps/chosen": -200.02012634277344, | |
| "logps/rejected": -180.67965698242188, | |
| "loss": 0.6985, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.4765182435512543, | |
| "rewards/margins": 0.23227711021900177, | |
| "rewards/rejected": 0.2442411184310913, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.8969315499606609, | |
| "grad_norm": 17.410009384155273, | |
| "learning_rate": 6.905454371913467e-07, | |
| "logits/chosen": -0.1638367921113968, | |
| "logits/rejected": -0.5099595189094543, | |
| "logps/chosen": -195.05340576171875, | |
| "logps/rejected": -173.5426788330078, | |
| "loss": 0.586, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.4057907164096832, | |
| "rewards/margins": 0.6160932183265686, | |
| "rewards/rejected": -0.21030254662036896, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9008654602675059, | |
| "grad_norm": 22.917627334594727, | |
| "learning_rate": 6.412433222214265e-07, | |
| "logits/chosen": -0.2664688527584076, | |
| "logits/rejected": -0.6332502365112305, | |
| "logps/chosen": -216.44711303710938, | |
| "logps/rejected": -192.39352416992188, | |
| "loss": 0.6699, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.47267699241638184, | |
| "rewards/margins": 0.3270387649536133, | |
| "rewards/rejected": 0.14563825726509094, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.9047993705743509, | |
| "grad_norm": 17.761707305908203, | |
| "learning_rate": 5.937087039615619e-07, | |
| "logits/chosen": 0.004246175289154053, | |
| "logits/rejected": -0.3583109974861145, | |
| "logps/chosen": -208.1468963623047, | |
| "logps/rejected": -186.03244018554688, | |
| "loss": 0.647, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.37165918946266174, | |
| "rewards/margins": 0.42197996377944946, | |
| "rewards/rejected": -0.05032079294323921, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9087332808811959, | |
| "grad_norm": 26.38233184814453, | |
| "learning_rate": 5.479505598095292e-07, | |
| "logits/chosen": -0.12539446353912354, | |
| "logits/rejected": -0.085462287068367, | |
| "logps/chosen": -205.96804809570312, | |
| "logps/rejected": -210.92672729492188, | |
| "loss": 0.7508, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.34662169218063354, | |
| "rewards/margins": 0.055609725415706635, | |
| "rewards/rejected": 0.2910119593143463, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.912667191188041, | |
| "grad_norm": 25.847694396972656, | |
| "learning_rate": 5.03977531657841e-07, | |
| "logits/chosen": -0.023742878809571266, | |
| "logits/rejected": -0.445591539144516, | |
| "logps/chosen": -206.13525390625, | |
| "logps/rejected": -183.71890258789062, | |
| "loss": 0.6829, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.3747442364692688, | |
| "rewards/margins": 0.33868470788002014, | |
| "rewards/rejected": 0.036059536039829254, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9166011014948859, | |
| "grad_norm": 14.531253814697266, | |
| "learning_rate": 4.6179792426163107e-07, | |
| "logits/chosen": -0.13202346861362457, | |
| "logits/rejected": -0.539734423160553, | |
| "logps/chosen": -192.2351531982422, | |
| "logps/rejected": -167.59829711914062, | |
| "loss": 0.6574, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.4072590470314026, | |
| "rewards/margins": 0.43480420112609863, | |
| "rewards/rejected": -0.027545183897018433, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9205350118017309, | |
| "grad_norm": 18.72174835205078, | |
| "learning_rate": 4.214197036702239e-07, | |
| "logits/chosen": 0.10880019515752792, | |
| "logits/rejected": -0.2607296109199524, | |
| "logps/chosen": -215.71939086914062, | |
| "logps/rejected": -196.47320556640625, | |
| "loss": 0.657, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.5132928490638733, | |
| "rewards/margins": 0.36024293303489685, | |
| "rewards/rejected": 0.15304993093013763, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9244689221085759, | |
| "grad_norm": 16.053632736206055, | |
| "learning_rate": 3.82850495722662e-07, | |
| "logits/chosen": -0.07127988338470459, | |
| "logits/rejected": -0.5435328483581543, | |
| "logps/chosen": -210.20547485351562, | |
| "logps/rejected": -173.3409881591797, | |
| "loss": 0.6586, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.4504272937774658, | |
| "rewards/margins": 0.3692251741886139, | |
| "rewards/rejected": 0.08120210468769073, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9284028324154209, | |
| "grad_norm": 20.035791397094727, | |
| "learning_rate": 3.4609758460748656e-07, | |
| "logits/chosen": -0.1992299109697342, | |
| "logits/rejected": -0.43638792634010315, | |
| "logps/chosen": -196.8170623779297, | |
| "logps/rejected": -181.31607055664062, | |
| "loss": 0.6511, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.29465168714523315, | |
| "rewards/margins": 0.3416779041290283, | |
| "rewards/rejected": -0.04702623561024666, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.932336742722266, | |
| "grad_norm": 14.104338645935059, | |
| "learning_rate": 3.1116791148704584e-07, | |
| "logits/chosen": -0.5095082521438599, | |
| "logits/rejected": -0.933671772480011, | |
| "logps/chosen": -181.0245819091797, | |
| "logps/rejected": -145.5948944091797, | |
| "loss": 0.6582, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.405670702457428, | |
| "rewards/margins": 0.39661210775375366, | |
| "rewards/rejected": 0.009058552794158459, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.9362706530291109, | |
| "grad_norm": 24.393505096435547, | |
| "learning_rate": 2.78068073186587e-07, | |
| "logits/chosen": -0.07540292292833328, | |
| "logits/rejected": -0.5439732670783997, | |
| "logps/chosen": -220.9651336669922, | |
| "logps/rejected": -198.6578826904297, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.47583404183387756, | |
| "rewards/margins": 0.4002237915992737, | |
| "rewards/rejected": 0.07561029493808746, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9402045633359559, | |
| "grad_norm": 22.48759651184082, | |
| "learning_rate": 2.4680432094837394e-07, | |
| "logits/chosen": -0.030767759308218956, | |
| "logits/rejected": -0.40518251061439514, | |
| "logps/chosen": -192.23971557617188, | |
| "logps/rejected": -165.24063110351562, | |
| "loss": 0.6944, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.32550159096717834, | |
| "rewards/margins": 0.2751082479953766, | |
| "rewards/rejected": 0.05039336532354355, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.9441384736428009, | |
| "grad_norm": 16.363256454467773, | |
| "learning_rate": 2.1738255925108253e-07, | |
| "logits/chosen": -0.5227106809616089, | |
| "logits/rejected": -0.7640475034713745, | |
| "logps/chosen": -218.41708374023438, | |
| "logps/rejected": -194.16444396972656, | |
| "loss": 0.668, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.38994377851486206, | |
| "rewards/margins": 0.3536146283149719, | |
| "rewards/rejected": 0.03632917255163193, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9441384736428009, | |
| "eval_logits/chosen": 1.2622064352035522, | |
| "eval_logits/rejected": 1.0416359901428223, | |
| "eval_logps/chosen": -207.0163116455078, | |
| "eval_logps/rejected": -180.30044555664062, | |
| "eval_loss": 0.6637659072875977, | |
| "eval_rewards/accuracies": 0.6390625238418579, | |
| "eval_rewards/chosen": 0.38159698247909546, | |
| "eval_rewards/margins": 0.34863370656967163, | |
| "eval_rewards/rejected": 0.03296329826116562, | |
| "eval_runtime": 307.2933, | |
| "eval_samples_per_second": 2.083, | |
| "eval_steps_per_second": 0.13, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.948072383949646, | |
| "grad_norm": 15.801830291748047, | |
| "learning_rate": 1.8980834469467523e-07, | |
| "logits/chosen": 0.028558891266584396, | |
| "logits/rejected": -0.36049187183380127, | |
| "logps/chosen": -225.1962127685547, | |
| "logps/rejected": -196.6998748779297, | |
| "loss": 0.7157, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.30342918634414673, | |
| "rewards/margins": 0.18997251987457275, | |
| "rewards/rejected": 0.11345665156841278, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.952006294256491, | |
| "grad_norm": 21.53165054321289, | |
| "learning_rate": 1.6408688495098134e-07, | |
| "logits/chosen": -0.09858529269695282, | |
| "logits/rejected": -0.52873694896698, | |
| "logps/chosen": -208.2776336669922, | |
| "logps/rejected": -179.6067657470703, | |
| "loss": 0.701, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.3496444821357727, | |
| "rewards/margins": 0.24224546551704407, | |
| "rewards/rejected": 0.10739902406930923, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.955940204563336, | |
| "grad_norm": 15.417522430419922, | |
| "learning_rate": 1.402230377801761e-07, | |
| "logits/chosen": -0.12817321717739105, | |
| "logits/rejected": -0.5611924529075623, | |
| "logps/chosen": -223.1984405517578, | |
| "logps/rejected": -191.31808471679688, | |
| "loss": 0.673, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.5014004707336426, | |
| "rewards/margins": 0.3005516231060028, | |
| "rewards/rejected": 0.20084881782531738, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.9598741148701809, | |
| "grad_norm": 21.139495849609375, | |
| "learning_rate": 1.1822131011334003e-07, | |
| "logits/chosen": -0.330310583114624, | |
| "logits/rejected": -0.6778287887573242, | |
| "logps/chosen": -206.1497802734375, | |
| "logps/rejected": -175.0183563232422, | |
| "loss": 0.6634, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.40439096093177795, | |
| "rewards/margins": 0.39146164059638977, | |
| "rewards/rejected": 0.012929338030517101, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.963808025177026, | |
| "grad_norm": 15.441524505615234, | |
| "learning_rate": 9.80858572012866e-08, | |
| "logits/chosen": -0.10460350662469864, | |
| "logits/rejected": -0.46022725105285645, | |
| "logps/chosen": -223.1492156982422, | |
| "logps/rejected": -193.82369995117188, | |
| "loss": 0.6415, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.3919476568698883, | |
| "rewards/margins": 0.37521207332611084, | |
| "rewards/rejected": 0.016735553741455078, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 19.24515724182129, | |
| "learning_rate": 7.982048182978985e-08, | |
| "logits/chosen": -0.3437039256095886, | |
| "logits/rejected": -0.7036724090576172, | |
| "logps/chosen": -210.358642578125, | |
| "logps/rejected": -189.95278930664062, | |
| "loss": 0.676, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.3740697503089905, | |
| "rewards/margins": 0.34888529777526855, | |
| "rewards/rejected": 0.02518446370959282, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.971675845790716, | |
| "grad_norm": 16.690387725830078, | |
| "learning_rate": 6.342863360139672e-08, | |
| "logits/chosen": -0.29954901337623596, | |
| "logits/rejected": -0.7138617634773254, | |
| "logps/chosen": -181.06094360351562, | |
| "logps/rejected": -157.12701416015625, | |
| "loss": 0.6961, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.3025146424770355, | |
| "rewards/margins": 0.25972747802734375, | |
| "rewards/rejected": 0.04278718680143356, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 17.477008819580078, | |
| "learning_rate": 4.8913408283934874e-08, | |
| "logits/chosen": -0.19394654035568237, | |
| "logits/rejected": -0.5592636466026306, | |
| "logps/chosen": -211.7626495361328, | |
| "logps/rejected": -190.55416870117188, | |
| "loss": 0.6955, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.39831337332725525, | |
| "rewards/margins": 0.3408041000366211, | |
| "rewards/rejected": 0.05750928074121475, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9795436664044059, | |
| "grad_norm": 14.256926536560059, | |
| "learning_rate": 3.627754722584031e-08, | |
| "logits/chosen": -0.15048038959503174, | |
| "logits/rejected": -0.5208483934402466, | |
| "logps/chosen": -223.10110473632812, | |
| "logps/rejected": -190.59140014648438, | |
| "loss": 0.6593, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.49489039182662964, | |
| "rewards/margins": 0.3687785863876343, | |
| "rewards/rejected": 0.12611182034015656, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.983477576711251, | |
| "grad_norm": 29.595378875732422, | |
| "learning_rate": 2.5523436838430503e-08, | |
| "logits/chosen": -0.3160143494606018, | |
| "logits/rejected": -0.6430375576019287, | |
| "logps/chosen": -196.36361694335938, | |
| "logps/rejected": -166.49758911132812, | |
| "loss": 0.6625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.36816468834877014, | |
| "rewards/margins": 0.36051416397094727, | |
| "rewards/rejected": 0.007650518324226141, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.987411487018096, | |
| "grad_norm": 13.689908027648926, | |
| "learning_rate": 1.665310814520482e-08, | |
| "logits/chosen": -0.6328016519546509, | |
| "logits/rejected": -0.9240643382072449, | |
| "logps/chosen": -188.88470458984375, | |
| "logps/rejected": -166.7686767578125, | |
| "loss": 0.6975, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.09804626554250717, | |
| "rewards/margins": 0.26263147592544556, | |
| "rewards/rejected": -0.1645852029323578, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.991345397324941, | |
| "grad_norm": 17.047653198242188, | |
| "learning_rate": 9.668236398262532e-09, | |
| "logits/chosen": -0.35158300399780273, | |
| "logits/rejected": -0.6125014424324036, | |
| "logps/chosen": -203.73788452148438, | |
| "logps/rejected": -189.255126953125, | |
| "loss": 0.6549, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.1564028114080429, | |
| "rewards/margins": 0.3770085275173187, | |
| "rewards/rejected": -0.22060570120811462, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.995279307631786, | |
| "grad_norm": 21.35641098022461, | |
| "learning_rate": 4.570140761918085e-09, | |
| "logits/chosen": -0.744472861289978, | |
| "logits/rejected": -0.9415663480758667, | |
| "logps/chosen": -186.4073028564453, | |
| "logps/rejected": -176.38418579101562, | |
| "loss": 0.6604, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.32688194513320923, | |
| "rewards/margins": 0.42695555090904236, | |
| "rewards/rejected": -0.10007365047931671, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.999213217938631, | |
| "grad_norm": 14.891934394836426, | |
| "learning_rate": 1.3597840635615201e-09, | |
| "logits/chosen": -0.14978916943073273, | |
| "logits/rejected": -0.6677058935165405, | |
| "logps/chosen": -209.85635375976562, | |
| "logps/rejected": -172.8124237060547, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.3781249225139618, | |
| "rewards/margins": 0.32455307245254517, | |
| "rewards/rejected": 0.05357181280851364, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1271, | |
| "total_flos": 0.0, | |
| "train_loss": 0.675776368140424, | |
| "train_runtime": 24039.6181, | |
| "train_samples_per_second": 0.846, | |
| "train_steps_per_second": 0.053 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1271, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |