| { | |
| "best_global_step": 1200, | |
| "best_metric": 0.4862891137599945, | |
| "best_model_checkpoint": "runs/dpo-OpenHermes-2.5-Mistral-7B-20251120-1236/checkpoints/checkpoint-1200", | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 1274, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003925417075564278, | |
| "grad_norm": 6.150045871734619, | |
| "learning_rate": 1.9937205651491366e-05, | |
| "logits/chosen": -3.0840773582458496, | |
| "logits/rejected": -3.0958099365234375, | |
| "logps/chosen": -311.7703857421875, | |
| "logps/rejected": -290.61724853515625, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.4833333492279053, | |
| "rewards/chosen": 0.13000372052192688, | |
| "rewards/margins": 0.03325975313782692, | |
| "rewards/rejected": 0.09674396365880966, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007850834151128557, | |
| "grad_norm": 10.357205390930176, | |
| "learning_rate": 1.9858712715855573e-05, | |
| "logits/chosen": -3.0053043365478516, | |
| "logits/rejected": -3.0651307106018066, | |
| "logps/chosen": -339.4639892578125, | |
| "logps/rejected": -330.821533203125, | |
| "loss": 0.6648, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.5165472626686096, | |
| "rewards/margins": 0.10410015285015106, | |
| "rewards/rejected": 0.412447065114975, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011776251226692836, | |
| "grad_norm": 5.737811088562012, | |
| "learning_rate": 1.9780219780219784e-05, | |
| "logits/chosen": -3.049772262573242, | |
| "logits/rejected": -2.9993741512298584, | |
| "logps/chosen": -339.0422668457031, | |
| "logps/rejected": -313.1036071777344, | |
| "loss": 0.6335, | |
| "rewards/accuracies": 0.6458333730697632, | |
| "rewards/chosen": 0.5449298620223999, | |
| "rewards/margins": 0.23259714245796204, | |
| "rewards/rejected": 0.31233277916908264, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.015701668302257114, | |
| "grad_norm": 5.574727535247803, | |
| "learning_rate": 1.9701726844583988e-05, | |
| "logits/chosen": -3.0041749477386475, | |
| "logits/rejected": -2.9779772758483887, | |
| "logps/chosen": -337.6470642089844, | |
| "logps/rejected": -344.86907958984375, | |
| "loss": 0.5883, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.6041615605354309, | |
| "rewards/margins": 0.3823166787624359, | |
| "rewards/rejected": 0.221844881772995, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.019627085377821395, | |
| "grad_norm": 5.843604564666748, | |
| "learning_rate": 1.9623233908948195e-05, | |
| "logits/chosen": -2.9967105388641357, | |
| "logits/rejected": -3.058979034423828, | |
| "logps/chosen": -295.9526062011719, | |
| "logps/rejected": -288.1315002441406, | |
| "loss": 0.5635, | |
| "rewards/accuracies": 0.7166666984558105, | |
| "rewards/chosen": 0.8546509742736816, | |
| "rewards/margins": 0.5589786767959595, | |
| "rewards/rejected": 0.2956722378730774, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.023552502453385672, | |
| "grad_norm": 5.119436264038086, | |
| "learning_rate": 1.9544740973312402e-05, | |
| "logits/chosen": -3.0148398876190186, | |
| "logits/rejected": -2.98360013961792, | |
| "logps/chosen": -310.6913146972656, | |
| "logps/rejected": -300.73724365234375, | |
| "loss": 0.5576, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 0.528806209564209, | |
| "rewards/margins": 0.6290292739868164, | |
| "rewards/rejected": -0.10022306442260742, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02747791952894995, | |
| "grad_norm": 6.46522855758667, | |
| "learning_rate": 1.9466248037676613e-05, | |
| "logits/chosen": -2.9573066234588623, | |
| "logits/rejected": -3.0088870525360107, | |
| "logps/chosen": -308.3896789550781, | |
| "logps/rejected": -297.2813415527344, | |
| "loss": 0.5858, | |
| "rewards/accuracies": 0.6541667580604553, | |
| "rewards/chosen": 0.3321291506290436, | |
| "rewards/margins": 0.6073407530784607, | |
| "rewards/rejected": -0.2752116024494171, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03140333660451423, | |
| "grad_norm": 5.806535720825195, | |
| "learning_rate": 1.9387755102040817e-05, | |
| "logits/chosen": -2.980921745300293, | |
| "logits/rejected": -3.0631861686706543, | |
| "logps/chosen": -328.13287353515625, | |
| "logps/rejected": -307.7826232910156, | |
| "loss": 0.6097, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.26903384923934937, | |
| "rewards/margins": 0.5272954106330872, | |
| "rewards/rejected": -0.2582615911960602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03532875368007851, | |
| "grad_norm": 5.124495506286621, | |
| "learning_rate": 1.9309262166405024e-05, | |
| "logits/chosen": -3.0031769275665283, | |
| "logits/rejected": -3.003542900085449, | |
| "logps/chosen": -307.12860107421875, | |
| "logps/rejected": -323.902099609375, | |
| "loss": 0.5483, | |
| "rewards/accuracies": 0.6833333373069763, | |
| "rewards/chosen": -0.10443999618291855, | |
| "rewards/margins": 0.6921336650848389, | |
| "rewards/rejected": -0.7965737581253052, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03925417075564279, | |
| "grad_norm": 5.090153217315674, | |
| "learning_rate": 1.923076923076923e-05, | |
| "logits/chosen": -3.009328842163086, | |
| "logits/rejected": -3.043778657913208, | |
| "logps/chosen": -316.96917724609375, | |
| "logps/rejected": -311.90960693359375, | |
| "loss": 0.5658, | |
| "rewards/accuracies": 0.7041667103767395, | |
| "rewards/chosen": -0.5328065752983093, | |
| "rewards/margins": 0.6153702139854431, | |
| "rewards/rejected": -1.148176670074463, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04317958783120707, | |
| "grad_norm": 5.527870178222656, | |
| "learning_rate": 1.9152276295133442e-05, | |
| "logits/chosen": -3.0257294178009033, | |
| "logits/rejected": -3.0307681560516357, | |
| "logps/chosen": -312.945068359375, | |
| "logps/rejected": -302.48126220703125, | |
| "loss": 0.5258, | |
| "rewards/accuracies": 0.7083333730697632, | |
| "rewards/chosen": -0.4231169819831848, | |
| "rewards/margins": 0.7520371079444885, | |
| "rewards/rejected": -1.1751540899276733, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.047105004906771344, | |
| "grad_norm": 5.95127010345459, | |
| "learning_rate": 1.9073783359497646e-05, | |
| "logits/chosen": -2.9485747814178467, | |
| "logits/rejected": -3.040693998336792, | |
| "logps/chosen": -323.712158203125, | |
| "logps/rejected": -296.09710693359375, | |
| "loss": 0.5437, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.07572325319051743, | |
| "rewards/margins": 0.7711307406425476, | |
| "rewards/rejected": -0.846854031085968, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05103042198233562, | |
| "grad_norm": 5.20306396484375, | |
| "learning_rate": 1.8995290423861853e-05, | |
| "logits/chosen": -2.932573080062866, | |
| "logits/rejected": -2.9804799556732178, | |
| "logps/chosen": -320.690185546875, | |
| "logps/rejected": -315.3598327636719, | |
| "loss": 0.5424, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": 0.1321389377117157, | |
| "rewards/margins": 0.7304352521896362, | |
| "rewards/rejected": -0.5982962846755981, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0549558390578999, | |
| "grad_norm": 6.864138603210449, | |
| "learning_rate": 1.891679748822606e-05, | |
| "logits/chosen": -2.9804160594940186, | |
| "logits/rejected": -3.058073043823242, | |
| "logps/chosen": -302.26055908203125, | |
| "logps/rejected": -324.0291442871094, | |
| "loss": 0.5701, | |
| "rewards/accuracies": 0.6833333969116211, | |
| "rewards/chosen": 0.36459389328956604, | |
| "rewards/margins": 0.7426995038986206, | |
| "rewards/rejected": -0.37810567021369934, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.058881256133464184, | |
| "grad_norm": 4.591891765594482, | |
| "learning_rate": 1.8838304552590268e-05, | |
| "logits/chosen": -2.9886913299560547, | |
| "logits/rejected": -2.987067699432373, | |
| "logps/chosen": -314.6214904785156, | |
| "logps/rejected": -298.33953857421875, | |
| "loss": 0.5124, | |
| "rewards/accuracies": 0.7333333492279053, | |
| "rewards/chosen": 0.3623279631137848, | |
| "rewards/margins": 0.8299944996833801, | |
| "rewards/rejected": -0.4676665663719177, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06280667320902845, | |
| "grad_norm": 4.877047538757324, | |
| "learning_rate": 1.8759811616954475e-05, | |
| "logits/chosen": -2.989983081817627, | |
| "logits/rejected": -3.0554869174957275, | |
| "logps/chosen": -312.76019287109375, | |
| "logps/rejected": -337.36920166015625, | |
| "loss": 0.5185, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": 0.21076758205890656, | |
| "rewards/margins": 0.8532983064651489, | |
| "rewards/rejected": -0.6425307393074036, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06673209028459273, | |
| "grad_norm": 6.023036003112793, | |
| "learning_rate": 1.8681318681318682e-05, | |
| "logits/chosen": -2.9612174034118652, | |
| "logits/rejected": -3.071810722351074, | |
| "logps/chosen": -326.7922058105469, | |
| "logps/rejected": -314.8778076171875, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.7083333730697632, | |
| "rewards/chosen": 0.18136966228485107, | |
| "rewards/margins": 0.7432295083999634, | |
| "rewards/rejected": -0.5618598461151123, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.07065750736015702, | |
| "grad_norm": 6.082668781280518, | |
| "learning_rate": 1.860282574568289e-05, | |
| "logits/chosen": -2.832726001739502, | |
| "logits/rejected": -2.891904830932617, | |
| "logps/chosen": -330.4433898925781, | |
| "logps/rejected": -299.0811462402344, | |
| "loss": 0.5603, | |
| "rewards/accuracies": 0.6916667222976685, | |
| "rewards/chosen": 0.12478373199701309, | |
| "rewards/margins": 0.821280300617218, | |
| "rewards/rejected": -0.6964964866638184, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0745829244357213, | |
| "grad_norm": 5.67427396774292, | |
| "learning_rate": 1.8524332810047097e-05, | |
| "logits/chosen": -2.997680902481079, | |
| "logits/rejected": -3.0600008964538574, | |
| "logps/chosen": -325.9932861328125, | |
| "logps/rejected": -297.598388671875, | |
| "loss": 0.5578, | |
| "rewards/accuracies": 0.7000000476837158, | |
| "rewards/chosen": 0.11321593821048737, | |
| "rewards/margins": 0.7891088724136353, | |
| "rewards/rejected": -0.6758929491043091, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.07850834151128558, | |
| "grad_norm": 5.028480052947998, | |
| "learning_rate": 1.8445839874411304e-05, | |
| "logits/chosen": -2.9710261821746826, | |
| "logits/rejected": -2.934241771697998, | |
| "logps/chosen": -332.2840881347656, | |
| "logps/rejected": -323.45794677734375, | |
| "loss": 0.5515, | |
| "rewards/accuracies": 0.7416667342185974, | |
| "rewards/chosen": -0.20720729231834412, | |
| "rewards/margins": 0.7714598774909973, | |
| "rewards/rejected": -0.9786672592163086, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07850834151128558, | |
| "eval_logits/chosen": -2.980896234512329, | |
| "eval_logits/rejected": -3.010272741317749, | |
| "eval_logps/chosen": -332.7633972167969, | |
| "eval_logps/rejected": -323.36285400390625, | |
| "eval_loss": 0.5452204346656799, | |
| "eval_rewards/accuracies": 0.7120000123977661, | |
| "eval_rewards/chosen": -0.3399922549724579, | |
| "eval_rewards/margins": 0.7534830570220947, | |
| "eval_rewards/rejected": -1.0934752225875854, | |
| "eval_runtime": 171.9076, | |
| "eval_samples_per_second": 11.634, | |
| "eval_steps_per_second": 5.817, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08243375858684986, | |
| "grad_norm": 4.673857688903809, | |
| "learning_rate": 1.836734693877551e-05, | |
| "logits/chosen": -2.955043077468872, | |
| "logits/rejected": -3.010183811187744, | |
| "logps/chosen": -360.6026611328125, | |
| "logps/rejected": -354.76043701171875, | |
| "loss": 0.538, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": -0.5105575919151306, | |
| "rewards/margins": 0.7706912755966187, | |
| "rewards/rejected": -1.2812488079071045, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08635917566241413, | |
| "grad_norm": 6.655648231506348, | |
| "learning_rate": 1.828885400313972e-05, | |
| "logits/chosen": -2.9778573513031006, | |
| "logits/rejected": -2.9207446575164795, | |
| "logps/chosen": -323.5428161621094, | |
| "logps/rejected": -342.148681640625, | |
| "loss": 0.5713, | |
| "rewards/accuracies": 0.6958334445953369, | |
| "rewards/chosen": -0.6803138852119446, | |
| "rewards/margins": 0.697836697101593, | |
| "rewards/rejected": -1.3781505823135376, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09028459273797841, | |
| "grad_norm": 5.369375228881836, | |
| "learning_rate": 1.8210361067503926e-05, | |
| "logits/chosen": -2.9552626609802246, | |
| "logits/rejected": -3.075476884841919, | |
| "logps/chosen": -348.5240173339844, | |
| "logps/rejected": -307.6527404785156, | |
| "loss": 0.5155, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": -0.3947049677371979, | |
| "rewards/margins": 0.8081506490707397, | |
| "rewards/rejected": -1.2028557062149048, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.09421000981354269, | |
| "grad_norm": 4.998170852661133, | |
| "learning_rate": 1.8131868131868133e-05, | |
| "logits/chosen": -2.8421874046325684, | |
| "logits/rejected": -2.9301371574401855, | |
| "logps/chosen": -320.47760009765625, | |
| "logps/rejected": -307.7643737792969, | |
| "loss": 0.499, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.32407405972480774, | |
| "rewards/margins": 0.8497726321220398, | |
| "rewards/rejected": -1.1738468408584595, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09813542688910697, | |
| "grad_norm": 6.9904890060424805, | |
| "learning_rate": 1.805337519623234e-05, | |
| "logits/chosen": -2.96341872215271, | |
| "logits/rejected": -2.9047999382019043, | |
| "logps/chosen": -350.89495849609375, | |
| "logps/rejected": -367.870361328125, | |
| "loss": 0.5847, | |
| "rewards/accuracies": 0.6958333849906921, | |
| "rewards/chosen": -0.3227779269218445, | |
| "rewards/margins": 0.7198423147201538, | |
| "rewards/rejected": -1.0426201820373535, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.10206084396467124, | |
| "grad_norm": 5.14391565322876, | |
| "learning_rate": 1.7974882260596548e-05, | |
| "logits/chosen": -3.0105910301208496, | |
| "logits/rejected": -3.0620298385620117, | |
| "logps/chosen": -341.6379089355469, | |
| "logps/rejected": -306.1999816894531, | |
| "loss": 0.5432, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": -0.4734025001525879, | |
| "rewards/margins": 0.7205262184143066, | |
| "rewards/rejected": -1.1939287185668945, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10598626104023552, | |
| "grad_norm": 5.304475784301758, | |
| "learning_rate": 1.7896389324960755e-05, | |
| "logits/chosen": -2.86970853805542, | |
| "logits/rejected": -2.8937735557556152, | |
| "logps/chosen": -313.49835205078125, | |
| "logps/rejected": -306.93585205078125, | |
| "loss": 0.5462, | |
| "rewards/accuracies": 0.7125000357627869, | |
| "rewards/chosen": -0.5461211800575256, | |
| "rewards/margins": 0.748654305934906, | |
| "rewards/rejected": -1.294775366783142, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1099116781157998, | |
| "grad_norm": 4.115116119384766, | |
| "learning_rate": 1.7817896389324962e-05, | |
| "logits/chosen": -3.0203137397766113, | |
| "logits/rejected": -3.0821175575256348, | |
| "logps/chosen": -354.2901306152344, | |
| "logps/rejected": -316.67205810546875, | |
| "loss": 0.5105, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": -0.45508939027786255, | |
| "rewards/margins": 0.8571721911430359, | |
| "rewards/rejected": -1.3122615814208984, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11383709519136408, | |
| "grad_norm": 5.043489456176758, | |
| "learning_rate": 1.773940345368917e-05, | |
| "logits/chosen": -2.8955276012420654, | |
| "logits/rejected": -2.957524061203003, | |
| "logps/chosen": -318.019775390625, | |
| "logps/rejected": -321.0815124511719, | |
| "loss": 0.5172, | |
| "rewards/accuracies": 0.7666667699813843, | |
| "rewards/chosen": -0.375745952129364, | |
| "rewards/margins": 0.8528572916984558, | |
| "rewards/rejected": -1.2286031246185303, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.11776251226692837, | |
| "grad_norm": 6.230247497558594, | |
| "learning_rate": 1.7660910518053377e-05, | |
| "logits/chosen": -2.986295700073242, | |
| "logits/rejected": -3.0270934104919434, | |
| "logps/chosen": -348.29058837890625, | |
| "logps/rejected": -330.9169006347656, | |
| "loss": 0.5398, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": -0.6200595498085022, | |
| "rewards/margins": 0.8251369595527649, | |
| "rewards/rejected": -1.4451963901519775, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12168792934249265, | |
| "grad_norm": 5.081642150878906, | |
| "learning_rate": 1.7582417582417584e-05, | |
| "logits/chosen": -2.935692548751831, | |
| "logits/rejected": -3.0154006481170654, | |
| "logps/chosen": -348.2768249511719, | |
| "logps/rejected": -316.89361572265625, | |
| "loss": 0.556, | |
| "rewards/accuracies": 0.7125000357627869, | |
| "rewards/chosen": -0.7520820498466492, | |
| "rewards/margins": 0.8174117207527161, | |
| "rewards/rejected": -1.5694936513900757, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1256133464180569, | |
| "grad_norm": 5.332559585571289, | |
| "learning_rate": 1.750392464678179e-05, | |
| "logits/chosen": -2.853811740875244, | |
| "logits/rejected": -2.8885927200317383, | |
| "logps/chosen": -339.7810363769531, | |
| "logps/rejected": -333.8861999511719, | |
| "loss": 0.5375, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": -0.8646346926689148, | |
| "rewards/margins": 0.8464757800102234, | |
| "rewards/rejected": -1.7111107110977173, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1295387634936212, | |
| "grad_norm": 5.659219741821289, | |
| "learning_rate": 1.7425431711146e-05, | |
| "logits/chosen": -2.9798367023468018, | |
| "logits/rejected": -3.0234591960906982, | |
| "logps/chosen": -369.3193054199219, | |
| "logps/rejected": -320.97088623046875, | |
| "loss": 0.5365, | |
| "rewards/accuracies": 0.7333333492279053, | |
| "rewards/chosen": -0.5549585223197937, | |
| "rewards/margins": 0.8239587545394897, | |
| "rewards/rejected": -1.3789172172546387, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.13346418056918546, | |
| "grad_norm": 6.147792339324951, | |
| "learning_rate": 1.7346938775510206e-05, | |
| "logits/chosen": -2.9105169773101807, | |
| "logits/rejected": -2.9818620681762695, | |
| "logps/chosen": -348.80194091796875, | |
| "logps/rejected": -312.60272216796875, | |
| "loss": 0.5357, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": -0.12175627052783966, | |
| "rewards/margins": 0.814881443977356, | |
| "rewards/rejected": -0.9366377592086792, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13738959764474976, | |
| "grad_norm": 5.922034740447998, | |
| "learning_rate": 1.7268445839874413e-05, | |
| "logits/chosen": -2.961211919784546, | |
| "logits/rejected": -3.006511688232422, | |
| "logps/chosen": -299.61260986328125, | |
| "logps/rejected": -318.53961181640625, | |
| "loss": 0.5611, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.25987696647644043, | |
| "rewards/margins": 0.7301396727561951, | |
| "rewards/rejected": -0.47026267647743225, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14131501472031405, | |
| "grad_norm": 5.806030750274658, | |
| "learning_rate": 1.718995290423862e-05, | |
| "logits/chosen": -2.8726003170013428, | |
| "logits/rejected": -2.9730982780456543, | |
| "logps/chosen": -335.31817626953125, | |
| "logps/rejected": -330.07611083984375, | |
| "loss": 0.4908, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.5076613426208496, | |
| "rewards/margins": 0.9330441355705261, | |
| "rewards/rejected": -0.4253828525543213, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1452404317958783, | |
| "grad_norm": 5.604033470153809, | |
| "learning_rate": 1.7111459968602827e-05, | |
| "logits/chosen": -3.0221714973449707, | |
| "logits/rejected": -3.0379929542541504, | |
| "logps/chosen": -299.20281982421875, | |
| "logps/rejected": -292.1725158691406, | |
| "loss": 0.5898, | |
| "rewards/accuracies": 0.6750000715255737, | |
| "rewards/chosen": 0.12294058501720428, | |
| "rewards/margins": 0.6319655179977417, | |
| "rewards/rejected": -0.5090248584747314, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1491658488714426, | |
| "grad_norm": 5.10993766784668, | |
| "learning_rate": 1.7032967032967035e-05, | |
| "logits/chosen": -2.916008949279785, | |
| "logits/rejected": -2.984748601913452, | |
| "logps/chosen": -313.1849365234375, | |
| "logps/rejected": -306.8742370605469, | |
| "loss": 0.5154, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.03891729563474655, | |
| "rewards/margins": 0.8485898971557617, | |
| "rewards/rejected": -0.8096725344657898, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.15309126594700687, | |
| "grad_norm": 4.921535015106201, | |
| "learning_rate": 1.6954474097331242e-05, | |
| "logits/chosen": -3.0675573348999023, | |
| "logits/rejected": -2.9610018730163574, | |
| "logps/chosen": -333.9068908691406, | |
| "logps/rejected": -316.9395446777344, | |
| "loss": 0.524, | |
| "rewards/accuracies": 0.7250000834465027, | |
| "rewards/chosen": -0.31782767176628113, | |
| "rewards/margins": 0.7863305807113647, | |
| "rewards/rejected": -1.1041581630706787, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15701668302257116, | |
| "grad_norm": 5.64870548248291, | |
| "learning_rate": 1.687598116169545e-05, | |
| "logits/chosen": -2.9307010173797607, | |
| "logits/rejected": -2.9969723224639893, | |
| "logps/chosen": -315.3205871582031, | |
| "logps/rejected": -319.23876953125, | |
| "loss": 0.5485, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.5450848937034607, | |
| "rewards/margins": 0.7509050965309143, | |
| "rewards/rejected": -1.295989990234375, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15701668302257116, | |
| "eval_logits/chosen": -3.0087897777557373, | |
| "eval_logits/rejected": -3.0380184650421143, | |
| "eval_logps/chosen": -334.6849670410156, | |
| "eval_logps/rejected": -325.60333251953125, | |
| "eval_loss": 0.5298904776573181, | |
| "eval_rewards/accuracies": 0.7354999780654907, | |
| "eval_rewards/chosen": -0.532148003578186, | |
| "eval_rewards/margins": 0.7853737473487854, | |
| "eval_rewards/rejected": -1.3175216913223267, | |
| "eval_runtime": 171.0727, | |
| "eval_samples_per_second": 11.691, | |
| "eval_steps_per_second": 5.845, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16094210009813542, | |
| "grad_norm": 5.222318172454834, | |
| "learning_rate": 1.6797488226059656e-05, | |
| "logits/chosen": -2.896974563598633, | |
| "logits/rejected": -2.8886332511901855, | |
| "logps/chosen": -343.0540466308594, | |
| "logps/rejected": -330.8460388183594, | |
| "loss": 0.5583, | |
| "rewards/accuracies": 0.6791667342185974, | |
| "rewards/chosen": -0.49637308716773987, | |
| "rewards/margins": 0.8196843266487122, | |
| "rewards/rejected": -1.3160574436187744, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.1648675171736997, | |
| "grad_norm": 4.988500118255615, | |
| "learning_rate": 1.6718995290423864e-05, | |
| "logits/chosen": -3.0786221027374268, | |
| "logits/rejected": -3.0803046226501465, | |
| "logps/chosen": -364.15679931640625, | |
| "logps/rejected": -344.13348388671875, | |
| "loss": 0.5415, | |
| "rewards/accuracies": 0.7083333730697632, | |
| "rewards/chosen": -0.23943662643432617, | |
| "rewards/margins": 0.7990777492523193, | |
| "rewards/rejected": -1.038514494895935, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16879293424926398, | |
| "grad_norm": 5.479763984680176, | |
| "learning_rate": 1.664050235478807e-05, | |
| "logits/chosen": -2.971986770629883, | |
| "logits/rejected": -2.9459171295166016, | |
| "logps/chosen": -297.9768371582031, | |
| "logps/rejected": -293.61004638671875, | |
| "loss": 0.5346, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": 0.14465472102165222, | |
| "rewards/margins": 0.7910071611404419, | |
| "rewards/rejected": -0.6463524103164673, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.17271835132482827, | |
| "grad_norm": 4.327686786651611, | |
| "learning_rate": 1.6562009419152278e-05, | |
| "logits/chosen": -3.0093016624450684, | |
| "logits/rejected": -2.9483301639556885, | |
| "logps/chosen": -312.3398742675781, | |
| "logps/rejected": -321.89990234375, | |
| "loss": 0.5276, | |
| "rewards/accuracies": 0.7416667342185974, | |
| "rewards/chosen": 0.4244857728481293, | |
| "rewards/margins": 0.7893426418304443, | |
| "rewards/rejected": -0.36485689878463745, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17664376840039253, | |
| "grad_norm": 5.979199409484863, | |
| "learning_rate": 1.6483516483516486e-05, | |
| "logits/chosen": -3.0172836780548096, | |
| "logits/rejected": -3.0143485069274902, | |
| "logps/chosen": -305.2528991699219, | |
| "logps/rejected": -335.7610168457031, | |
| "loss": 0.5053, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": 0.4461596608161926, | |
| "rewards/margins": 0.9256842732429504, | |
| "rewards/rejected": -0.47952452301979065, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.18056918547595682, | |
| "grad_norm": 5.040202617645264, | |
| "learning_rate": 1.6405023547880693e-05, | |
| "logits/chosen": -2.9379525184631348, | |
| "logits/rejected": -2.9742045402526855, | |
| "logps/chosen": -293.68841552734375, | |
| "logps/rejected": -288.2654724121094, | |
| "loss": 0.5655, | |
| "rewards/accuracies": 0.6958333253860474, | |
| "rewards/chosen": 0.10342751443386078, | |
| "rewards/margins": 0.7664733529090881, | |
| "rewards/rejected": -0.6630457639694214, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1844946025515211, | |
| "grad_norm": 6.082977294921875, | |
| "learning_rate": 1.63265306122449e-05, | |
| "logits/chosen": -2.972628593444824, | |
| "logits/rejected": -2.9927875995635986, | |
| "logps/chosen": -342.9659118652344, | |
| "logps/rejected": -326.91204833984375, | |
| "loss": 0.589, | |
| "rewards/accuracies": 0.6791666746139526, | |
| "rewards/chosen": -0.3530040681362152, | |
| "rewards/margins": 0.6996762752532959, | |
| "rewards/rejected": -1.0526803731918335, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.18842001962708538, | |
| "grad_norm": 5.5625176429748535, | |
| "learning_rate": 1.6248037676609107e-05, | |
| "logits/chosen": -2.9938926696777344, | |
| "logits/rejected": -3.048494338989258, | |
| "logps/chosen": -365.29718017578125, | |
| "logps/rejected": -342.7457275390625, | |
| "loss": 0.56, | |
| "rewards/accuracies": 0.6916667222976685, | |
| "rewards/chosen": -0.7515178918838501, | |
| "rewards/margins": 0.7500923871994019, | |
| "rewards/rejected": -1.501610279083252, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.19234543670264967, | |
| "grad_norm": 4.691596984863281, | |
| "learning_rate": 1.6169544740973315e-05, | |
| "logits/chosen": -3.0218453407287598, | |
| "logits/rejected": -3.0742886066436768, | |
| "logps/chosen": -317.8963928222656, | |
| "logps/rejected": -302.0952453613281, | |
| "loss": 0.4919, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": -0.7345434427261353, | |
| "rewards/margins": 0.9255669713020325, | |
| "rewards/rejected": -1.6601102352142334, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.19627085377821393, | |
| "grad_norm": 6.012603759765625, | |
| "learning_rate": 1.6091051805337522e-05, | |
| "logits/chosen": -2.928499698638916, | |
| "logits/rejected": -3.021066188812256, | |
| "logps/chosen": -358.77154541015625, | |
| "logps/rejected": -351.7578430175781, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6217355728149414, | |
| "rewards/margins": 0.8296122550964355, | |
| "rewards/rejected": -1.4513477087020874, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20019627085377822, | |
| "grad_norm": 5.680856227874756, | |
| "learning_rate": 1.601255886970173e-05, | |
| "logits/chosen": -2.9057984352111816, | |
| "logits/rejected": -2.9277281761169434, | |
| "logps/chosen": -328.77520751953125, | |
| "logps/rejected": -336.87127685546875, | |
| "loss": 0.5115, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.37844187021255493, | |
| "rewards/margins": 0.9464572668075562, | |
| "rewards/rejected": -1.3248990774154663, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2041216879293425, | |
| "grad_norm": 5.254044532775879, | |
| "learning_rate": 1.5934065934065933e-05, | |
| "logits/chosen": -2.89184832572937, | |
| "logits/rejected": -2.9895052909851074, | |
| "logps/chosen": -327.46881103515625, | |
| "logps/rejected": -325.6787414550781, | |
| "loss": 0.5054, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": -0.12966637313365936, | |
| "rewards/margins": 1.035685658454895, | |
| "rewards/rejected": -1.1653519868850708, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.20804710500490678, | |
| "grad_norm": 5.806418418884277, | |
| "learning_rate": 1.5855572998430144e-05, | |
| "logits/chosen": -2.9994475841522217, | |
| "logits/rejected": -3.0370442867279053, | |
| "logps/chosen": -306.01910400390625, | |
| "logps/rejected": -285.0171203613281, | |
| "loss": 0.5742, | |
| "rewards/accuracies": 0.6958333253860474, | |
| "rewards/chosen": -0.1305859386920929, | |
| "rewards/margins": 0.7369558215141296, | |
| "rewards/rejected": -0.8675416707992554, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.21197252208047104, | |
| "grad_norm": 4.36320686340332, | |
| "learning_rate": 1.577708006279435e-05, | |
| "logits/chosen": -2.8925890922546387, | |
| "logits/rejected": -2.969078540802002, | |
| "logps/chosen": -336.0727233886719, | |
| "logps/rejected": -323.86175537109375, | |
| "loss": 0.4837, | |
| "rewards/accuracies": 0.7791666984558105, | |
| "rewards/chosen": -0.15950943529605865, | |
| "rewards/margins": 0.9283342361450195, | |
| "rewards/rejected": -1.087843656539917, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21589793915603533, | |
| "grad_norm": 4.775585174560547, | |
| "learning_rate": 1.5698587127158558e-05, | |
| "logits/chosen": -2.9799633026123047, | |
| "logits/rejected": -2.998260736465454, | |
| "logps/chosen": -339.5887756347656, | |
| "logps/rejected": -322.0613708496094, | |
| "loss": 0.4891, | |
| "rewards/accuracies": 0.7458333969116211, | |
| "rewards/chosen": -0.2023775577545166, | |
| "rewards/margins": 0.9467275738716125, | |
| "rewards/rejected": -1.1491053104400635, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2198233562315996, | |
| "grad_norm": 5.527752876281738, | |
| "learning_rate": 1.5620094191522762e-05, | |
| "logits/chosen": -3.0590403079986572, | |
| "logits/rejected": -3.1203842163085938, | |
| "logps/chosen": -326.7115478515625, | |
| "logps/rejected": -302.5054016113281, | |
| "loss": 0.5579, | |
| "rewards/accuracies": 0.6791666746139526, | |
| "rewards/chosen": -0.37672197818756104, | |
| "rewards/margins": 0.8057243227958679, | |
| "rewards/rejected": -1.1824463605880737, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2237487733071639, | |
| "grad_norm": 4.813480377197266, | |
| "learning_rate": 1.5541601255886973e-05, | |
| "logits/chosen": -3.0462276935577393, | |
| "logits/rejected": -3.101527690887451, | |
| "logps/chosen": -325.5246276855469, | |
| "logps/rejected": -336.7899475097656, | |
| "loss": 0.5218, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": -0.45412200689315796, | |
| "rewards/margins": 0.7958036661148071, | |
| "rewards/rejected": -1.2499258518218994, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.22767419038272815, | |
| "grad_norm": 4.553357124328613, | |
| "learning_rate": 1.546310832025118e-05, | |
| "logits/chosen": -2.9580142498016357, | |
| "logits/rejected": -2.963024139404297, | |
| "logps/chosen": -342.618896484375, | |
| "logps/rejected": -353.5760803222656, | |
| "loss": 0.5056, | |
| "rewards/accuracies": 0.7458333373069763, | |
| "rewards/chosen": -0.3053968548774719, | |
| "rewards/margins": 0.9428439140319824, | |
| "rewards/rejected": -1.2482408285140991, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.23159960745829244, | |
| "grad_norm": 5.060155391693115, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "logits/chosen": -2.9472720623016357, | |
| "logits/rejected": -3.016634702682495, | |
| "logps/chosen": -344.4608154296875, | |
| "logps/rejected": -318.6928405761719, | |
| "loss": 0.4181, | |
| "rewards/accuracies": 0.79583340883255, | |
| "rewards/chosen": -0.15565678477287292, | |
| "rewards/margins": 1.2021899223327637, | |
| "rewards/rejected": -1.3578466176986694, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.23552502453385674, | |
| "grad_norm": 3.455629348754883, | |
| "learning_rate": 1.530612244897959e-05, | |
| "logits/chosen": -2.9951541423797607, | |
| "logits/rejected": -3.020198106765747, | |
| "logps/chosen": -308.458251953125, | |
| "logps/rejected": -329.9710998535156, | |
| "loss": 0.4599, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.03096728026866913, | |
| "rewards/margins": 1.1172107458114624, | |
| "rewards/rejected": -1.148177981376648, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23552502453385674, | |
| "eval_logits/chosen": -3.0017831325531006, | |
| "eval_logits/rejected": -3.031320333480835, | |
| "eval_logps/chosen": -331.4214782714844, | |
| "eval_logps/rejected": -324.1851806640625, | |
| "eval_loss": 0.5177174210548401, | |
| "eval_rewards/accuracies": 0.7275000214576721, | |
| "eval_rewards/chosen": -0.20579998195171356, | |
| "eval_rewards/margins": 0.9699056148529053, | |
| "eval_rewards/rejected": -1.1757057905197144, | |
| "eval_runtime": 170.7357, | |
| "eval_samples_per_second": 11.714, | |
| "eval_steps_per_second": 5.857, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.239450441609421, | |
| "grad_norm": 5.653757572174072, | |
| "learning_rate": 1.52276295133438e-05, | |
| "logits/chosen": -2.964921236038208, | |
| "logits/rejected": -2.9607841968536377, | |
| "logps/chosen": -355.35760498046875, | |
| "logps/rejected": -382.9061584472656, | |
| "loss": 0.5153, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": -0.141874298453331, | |
| "rewards/margins": 1.0537707805633545, | |
| "rewards/rejected": -1.1956450939178467, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2433758586849853, | |
| "grad_norm": 5.022225856781006, | |
| "learning_rate": 1.5149136577708007e-05, | |
| "logits/chosen": -2.906406879425049, | |
| "logits/rejected": -3.0163021087646484, | |
| "logps/chosen": -315.5581970214844, | |
| "logps/rejected": -309.3556213378906, | |
| "loss": 0.5086, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.02891678549349308, | |
| "rewards/margins": 1.1372863054275513, | |
| "rewards/rejected": -1.1662030220031738, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24730127576054955, | |
| "grad_norm": 4.488466739654541, | |
| "learning_rate": 1.5070643642072216e-05, | |
| "logits/chosen": -3.0036263465881348, | |
| "logits/rejected": -3.0129470825195312, | |
| "logps/chosen": -334.03955078125, | |
| "logps/rejected": -307.4307861328125, | |
| "loss": 0.5034, | |
| "rewards/accuracies": 0.7458333373069763, | |
| "rewards/chosen": -0.05140721797943115, | |
| "rewards/margins": 1.0299065113067627, | |
| "rewards/rejected": -1.0813137292861938, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2512266928361138, | |
| "grad_norm": 6.025671482086182, | |
| "learning_rate": 1.4992150706436422e-05, | |
| "logits/chosen": -3.0598220825195312, | |
| "logits/rejected": -2.9974725246429443, | |
| "logps/chosen": -336.5476989746094, | |
| "logps/rejected": -314.2373046875, | |
| "loss": 0.5272, | |
| "rewards/accuracies": 0.7416667342185974, | |
| "rewards/chosen": -0.0364832878112793, | |
| "rewards/margins": 0.9740719795227051, | |
| "rewards/rejected": -1.010555386543274, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.25515210991167814, | |
| "grad_norm": 3.7678122520446777, | |
| "learning_rate": 1.4913657770800629e-05, | |
| "logits/chosen": -3.0125625133514404, | |
| "logits/rejected": -3.0665228366851807, | |
| "logps/chosen": -373.77386474609375, | |
| "logps/rejected": -362.3180847167969, | |
| "loss": 0.4491, | |
| "rewards/accuracies": 0.7833333611488342, | |
| "rewards/chosen": -0.11678016185760498, | |
| "rewards/margins": 1.1747580766677856, | |
| "rewards/rejected": -1.291538119316101, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2590775269872424, | |
| "grad_norm": 6.283595085144043, | |
| "learning_rate": 1.4835164835164836e-05, | |
| "logits/chosen": -2.9614596366882324, | |
| "logits/rejected": -3.0732309818267822, | |
| "logps/chosen": -340.6827392578125, | |
| "logps/rejected": -330.89178466796875, | |
| "loss": 0.5298, | |
| "rewards/accuracies": 0.7583334445953369, | |
| "rewards/chosen": -0.27896976470947266, | |
| "rewards/margins": 1.1068425178527832, | |
| "rewards/rejected": -1.3858124017715454, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26300294406280667, | |
| "grad_norm": 5.159917831420898, | |
| "learning_rate": 1.4756671899529042e-05, | |
| "logits/chosen": -2.9882471561431885, | |
| "logits/rejected": -2.958286762237549, | |
| "logps/chosen": -304.15899658203125, | |
| "logps/rejected": -337.4718933105469, | |
| "loss": 0.5442, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": -0.5084502100944519, | |
| "rewards/margins": 0.826800525188446, | |
| "rewards/rejected": -1.3352506160736084, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.26692836113837093, | |
| "grad_norm": 5.694521427154541, | |
| "learning_rate": 1.467817896389325e-05, | |
| "logits/chosen": -3.0913736820220947, | |
| "logits/rejected": -3.1289680004119873, | |
| "logps/chosen": -306.4797668457031, | |
| "logps/rejected": -315.7909851074219, | |
| "loss": 0.5478, | |
| "rewards/accuracies": 0.708333432674408, | |
| "rewards/chosen": -0.5964398384094238, | |
| "rewards/margins": 0.8492003679275513, | |
| "rewards/rejected": -1.4456400871276855, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.27085377821393525, | |
| "grad_norm": 4.4798665046691895, | |
| "learning_rate": 1.4599686028257458e-05, | |
| "logits/chosen": -2.967850923538208, | |
| "logits/rejected": -3.0410642623901367, | |
| "logps/chosen": -341.167236328125, | |
| "logps/rejected": -345.0897216796875, | |
| "loss": 0.491, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.3792761266231537, | |
| "rewards/margins": 1.023285150527954, | |
| "rewards/rejected": -1.4025614261627197, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2747791952894995, | |
| "grad_norm": 4.080179214477539, | |
| "learning_rate": 1.4521193092621665e-05, | |
| "logits/chosen": -3.078789234161377, | |
| "logits/rejected": -3.0680148601531982, | |
| "logps/chosen": -342.5566711425781, | |
| "logps/rejected": -333.19732666015625, | |
| "loss": 0.4792, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.26053228974342346, | |
| "rewards/margins": 1.0307527780532837, | |
| "rewards/rejected": -1.2912850379943848, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2787046123650638, | |
| "grad_norm": 6.406314849853516, | |
| "learning_rate": 1.4442700156985871e-05, | |
| "logits/chosen": -2.993866443634033, | |
| "logits/rejected": -3.0816330909729004, | |
| "logps/chosen": -325.2431640625, | |
| "logps/rejected": -306.7488708496094, | |
| "loss": 0.5484, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": -0.2325890064239502, | |
| "rewards/margins": 0.7650774121284485, | |
| "rewards/rejected": -0.9976664781570435, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2826300294406281, | |
| "grad_norm": 4.517834663391113, | |
| "learning_rate": 1.436420722135008e-05, | |
| "logits/chosen": -2.9789466857910156, | |
| "logits/rejected": -2.9082839488983154, | |
| "logps/chosen": -326.4145202636719, | |
| "logps/rejected": -335.26019287109375, | |
| "loss": 0.4693, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.11003933846950531, | |
| "rewards/margins": 0.9707590341567993, | |
| "rewards/rejected": -1.0807983875274658, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.28655544651619236, | |
| "grad_norm": 3.715141534805298, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "logits/chosen": -2.981189250946045, | |
| "logits/rejected": -3.0649161338806152, | |
| "logps/chosen": -307.39764404296875, | |
| "logps/rejected": -301.6369934082031, | |
| "loss": 0.481, | |
| "rewards/accuracies": 0.7791666984558105, | |
| "rewards/chosen": -0.12946780025959015, | |
| "rewards/margins": 1.1251821517944336, | |
| "rewards/rejected": -1.2546498775482178, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2904808635917566, | |
| "grad_norm": 3.333448648452759, | |
| "learning_rate": 1.4207221350078494e-05, | |
| "logits/chosen": -2.984611988067627, | |
| "logits/rejected": -3.030081272125244, | |
| "logps/chosen": -309.75335693359375, | |
| "logps/rejected": -308.0655517578125, | |
| "loss": 0.4995, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.0332108810544014, | |
| "rewards/margins": 1.0196669101715088, | |
| "rewards/rejected": -1.0528777837753296, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2944062806673209, | |
| "grad_norm": 5.067115306854248, | |
| "learning_rate": 1.41287284144427e-05, | |
| "logits/chosen": -3.0295064449310303, | |
| "logits/rejected": -3.0551486015319824, | |
| "logps/chosen": -306.81561279296875, | |
| "logps/rejected": -293.89971923828125, | |
| "loss": 0.5708, | |
| "rewards/accuracies": 0.6833333373069763, | |
| "rewards/chosen": -0.12991970777511597, | |
| "rewards/margins": 0.8558316230773926, | |
| "rewards/rejected": -0.9857513308525085, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2983316977428852, | |
| "grad_norm": 4.063174247741699, | |
| "learning_rate": 1.4050235478806909e-05, | |
| "logits/chosen": -2.9749984741210938, | |
| "logits/rejected": -3.0381150245666504, | |
| "logps/chosen": -374.1710510253906, | |
| "logps/rejected": -350.29168701171875, | |
| "loss": 0.5021, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.11557143926620483, | |
| "rewards/margins": 1.067694067955017, | |
| "rewards/rejected": -1.1832655668258667, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.30225711481844947, | |
| "grad_norm": 4.8487396240234375, | |
| "learning_rate": 1.3971742543171116e-05, | |
| "logits/chosen": -2.9603800773620605, | |
| "logits/rejected": -3.019071102142334, | |
| "logps/chosen": -311.37628173828125, | |
| "logps/rejected": -320.9827575683594, | |
| "loss": 0.4274, | |
| "rewards/accuracies": 0.8166667222976685, | |
| "rewards/chosen": -0.017604345455765724, | |
| "rewards/margins": 1.2022392749786377, | |
| "rewards/rejected": -1.2198436260223389, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.30618253189401373, | |
| "grad_norm": 4.178959846496582, | |
| "learning_rate": 1.3893249607535323e-05, | |
| "logits/chosen": -3.040865421295166, | |
| "logits/rejected": -3.0666539669036865, | |
| "logps/chosen": -327.70928955078125, | |
| "logps/rejected": -322.9264221191406, | |
| "loss": 0.4558, | |
| "rewards/accuracies": 0.7750000953674316, | |
| "rewards/chosen": 0.11298879235982895, | |
| "rewards/margins": 1.198480248451233, | |
| "rewards/rejected": -1.085491418838501, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.310107948969578, | |
| "grad_norm": 4.6533122062683105, | |
| "learning_rate": 1.3814756671899529e-05, | |
| "logits/chosen": -2.9745497703552246, | |
| "logits/rejected": -3.0309340953826904, | |
| "logps/chosen": -330.2334289550781, | |
| "logps/rejected": -340.81536865234375, | |
| "loss": 0.5296, | |
| "rewards/accuracies": 0.7041667103767395, | |
| "rewards/chosen": -0.015470663085579872, | |
| "rewards/margins": 1.0612616539001465, | |
| "rewards/rejected": -1.0767322778701782, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3140333660451423, | |
| "grad_norm": 5.215358734130859, | |
| "learning_rate": 1.3736263736263738e-05, | |
| "logits/chosen": -3.002040147781372, | |
| "logits/rejected": -3.0286686420440674, | |
| "logps/chosen": -353.7515563964844, | |
| "logps/rejected": -327.62030029296875, | |
| "loss": 0.5766, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.2245529592037201, | |
| "rewards/margins": 0.8832836151123047, | |
| "rewards/rejected": -1.1078366041183472, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3140333660451423, | |
| "eval_logits/chosen": -3.016608953475952, | |
| "eval_logits/rejected": -3.0451409816741943, | |
| "eval_logps/chosen": -331.353271484375, | |
| "eval_logps/rejected": -324.7242736816406, | |
| "eval_loss": 0.5065792202949524, | |
| "eval_rewards/accuracies": 0.7444999814033508, | |
| "eval_rewards/chosen": -0.19897931814193726, | |
| "eval_rewards/margins": 1.030638337135315, | |
| "eval_rewards/rejected": -1.2296175956726074, | |
| "eval_runtime": 172.2901, | |
| "eval_samples_per_second": 11.608, | |
| "eval_steps_per_second": 5.804, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3179587831207066, | |
| "grad_norm": 3.946781635284424, | |
| "learning_rate": 1.3657770800627945e-05, | |
| "logits/chosen": -2.966956853866577, | |
| "logits/rejected": -3.056098461151123, | |
| "logps/chosen": -356.13909912109375, | |
| "logps/rejected": -317.91571044921875, | |
| "loss": 0.4387, | |
| "rewards/accuracies": 0.7791666984558105, | |
| "rewards/chosen": -0.06729185581207275, | |
| "rewards/margins": 1.262393832206726, | |
| "rewards/rejected": -1.3296858072280884, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.32188420019627084, | |
| "grad_norm": 5.394034385681152, | |
| "learning_rate": 1.357927786499215e-05, | |
| "logits/chosen": -3.05330491065979, | |
| "logits/rejected": -3.038001775741577, | |
| "logps/chosen": -359.85101318359375, | |
| "logps/rejected": -340.84295654296875, | |
| "loss": 0.5456, | |
| "rewards/accuracies": 0.7458333969116211, | |
| "rewards/chosen": -0.33758872747421265, | |
| "rewards/margins": 0.9710124731063843, | |
| "rewards/rejected": -1.3086011409759521, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3258096172718351, | |
| "grad_norm": 5.074666976928711, | |
| "learning_rate": 1.3500784929356358e-05, | |
| "logits/chosen": -3.051016092300415, | |
| "logits/rejected": -3.0877463817596436, | |
| "logps/chosen": -344.28314208984375, | |
| "logps/rejected": -312.5767822265625, | |
| "loss": 0.5506, | |
| "rewards/accuracies": 0.7166666984558105, | |
| "rewards/chosen": -0.33664119243621826, | |
| "rewards/margins": 0.9169348478317261, | |
| "rewards/rejected": -1.2535761594772339, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3297350343473994, | |
| "grad_norm": 6.106690883636475, | |
| "learning_rate": 1.3422291993720567e-05, | |
| "logits/chosen": -3.0146520137786865, | |
| "logits/rejected": -3.0691580772399902, | |
| "logps/chosen": -326.73046875, | |
| "logps/rejected": -341.7567138671875, | |
| "loss": 0.5092, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": -0.4858369827270508, | |
| "rewards/margins": 1.0325465202331543, | |
| "rewards/rejected": -1.5183833837509155, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3336604514229637, | |
| "grad_norm": 4.349857330322266, | |
| "learning_rate": 1.3343799058084774e-05, | |
| "logits/chosen": -3.0472264289855957, | |
| "logits/rejected": -3.1090333461761475, | |
| "logps/chosen": -336.30706787109375, | |
| "logps/rejected": -323.3406982421875, | |
| "loss": 0.4795, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.33309873938560486, | |
| "rewards/margins": 1.1322623491287231, | |
| "rewards/rejected": -1.46536123752594, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33758586849852795, | |
| "grad_norm": 4.5804219245910645, | |
| "learning_rate": 1.326530612244898e-05, | |
| "logits/chosen": -2.928574323654175, | |
| "logits/rejected": -3.0479984283447266, | |
| "logps/chosen": -346.50762939453125, | |
| "logps/rejected": -323.5039978027344, | |
| "loss": 0.4464, | |
| "rewards/accuracies": 0.7750000357627869, | |
| "rewards/chosen": -0.41089972853660583, | |
| "rewards/margins": 1.1351535320281982, | |
| "rewards/rejected": -1.546053409576416, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.34151128557409227, | |
| "grad_norm": 4.321537494659424, | |
| "learning_rate": 1.3186813186813187e-05, | |
| "logits/chosen": -3.0598042011260986, | |
| "logits/rejected": -3.1074109077453613, | |
| "logps/chosen": -331.7185363769531, | |
| "logps/rejected": -315.3821716308594, | |
| "loss": 0.5302, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3797026574611664, | |
| "rewards/margins": 0.9000622034072876, | |
| "rewards/rejected": -1.2797647714614868, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.34543670264965654, | |
| "grad_norm": 5.391077041625977, | |
| "learning_rate": 1.3108320251177396e-05, | |
| "logits/chosen": -3.0739028453826904, | |
| "logits/rejected": -3.085374355316162, | |
| "logps/chosen": -336.9720458984375, | |
| "logps/rejected": -317.68609619140625, | |
| "loss": 0.4884, | |
| "rewards/accuracies": 0.7625001072883606, | |
| "rewards/chosen": 0.12200820446014404, | |
| "rewards/margins": 1.0954450368881226, | |
| "rewards/rejected": -0.973436713218689, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3493621197252208, | |
| "grad_norm": 4.104001045227051, | |
| "learning_rate": 1.3029827315541603e-05, | |
| "logits/chosen": -3.0037009716033936, | |
| "logits/rejected": -3.0276780128479004, | |
| "logps/chosen": -311.5299377441406, | |
| "logps/rejected": -290.1028137207031, | |
| "loss": 0.5133, | |
| "rewards/accuracies": 0.7291667461395264, | |
| "rewards/chosen": 0.2962660789489746, | |
| "rewards/margins": 1.045003056526184, | |
| "rewards/rejected": -0.7487369775772095, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.35328753680078506, | |
| "grad_norm": 4.049530506134033, | |
| "learning_rate": 1.2951334379905809e-05, | |
| "logits/chosen": -3.0438899993896484, | |
| "logits/rejected": -3.1289470195770264, | |
| "logps/chosen": -312.2237548828125, | |
| "logps/rejected": -289.80206298828125, | |
| "loss": 0.4748, | |
| "rewards/accuracies": 0.783333420753479, | |
| "rewards/chosen": 0.2921372056007385, | |
| "rewards/margins": 1.1328870058059692, | |
| "rewards/rejected": -0.8407497406005859, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3572129538763494, | |
| "grad_norm": 4.7967753410339355, | |
| "learning_rate": 1.2872841444270016e-05, | |
| "logits/chosen": -3.0214269161224365, | |
| "logits/rejected": -3.032402276992798, | |
| "logps/chosen": -320.35296630859375, | |
| "logps/rejected": -322.9061279296875, | |
| "loss": 0.4614, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": 0.3199036717414856, | |
| "rewards/margins": 1.2129650115966797, | |
| "rewards/rejected": -0.8930614590644836, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.36113837095191365, | |
| "grad_norm": 4.228839874267578, | |
| "learning_rate": 1.2794348508634225e-05, | |
| "logits/chosen": -3.0375332832336426, | |
| "logits/rejected": -3.0748536586761475, | |
| "logps/chosen": -338.22369384765625, | |
| "logps/rejected": -323.2271423339844, | |
| "loss": 0.4843, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": 0.07565226405858994, | |
| "rewards/margins": 1.1118285655975342, | |
| "rewards/rejected": -1.036176323890686, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3650637880274779, | |
| "grad_norm": 4.642974376678467, | |
| "learning_rate": 1.271585557299843e-05, | |
| "logits/chosen": -3.008430004119873, | |
| "logits/rejected": -3.081514835357666, | |
| "logps/chosen": -329.63775634765625, | |
| "logps/rejected": -327.0590515136719, | |
| "loss": 0.5383, | |
| "rewards/accuracies": 0.7166666984558105, | |
| "rewards/chosen": -0.1453629583120346, | |
| "rewards/margins": 1.031752347946167, | |
| "rewards/rejected": -1.1771153211593628, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3689892051030422, | |
| "grad_norm": 5.04410457611084, | |
| "learning_rate": 1.2637362637362638e-05, | |
| "logits/chosen": -3.0850863456726074, | |
| "logits/rejected": -3.0572659969329834, | |
| "logps/chosen": -331.3392028808594, | |
| "logps/rejected": -314.00653076171875, | |
| "loss": 0.4975, | |
| "rewards/accuracies": 0.7083333730697632, | |
| "rewards/chosen": -0.1242925375699997, | |
| "rewards/margins": 1.032974362373352, | |
| "rewards/rejected": -1.1572668552398682, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3729146221786065, | |
| "grad_norm": 6.009474754333496, | |
| "learning_rate": 1.2558869701726845e-05, | |
| "logits/chosen": -2.9685397148132324, | |
| "logits/rejected": -3.083059787750244, | |
| "logps/chosen": -352.71142578125, | |
| "logps/rejected": -327.12872314453125, | |
| "loss": 0.5339, | |
| "rewards/accuracies": 0.6958333849906921, | |
| "rewards/chosen": -0.14256823062896729, | |
| "rewards/margins": 1.011826753616333, | |
| "rewards/rejected": -1.1543948650360107, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.37684003925417076, | |
| "grad_norm": 5.437267303466797, | |
| "learning_rate": 1.2480376766091054e-05, | |
| "logits/chosen": -3.053178310394287, | |
| "logits/rejected": -3.0942587852478027, | |
| "logps/chosen": -320.6322021484375, | |
| "logps/rejected": -312.03155517578125, | |
| "loss": 0.5107, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.028355002403259277, | |
| "rewards/margins": 1.0027117729187012, | |
| "rewards/rejected": -1.031066656112671, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.380765456329735, | |
| "grad_norm": 5.028485298156738, | |
| "learning_rate": 1.240188383045526e-05, | |
| "logits/chosen": -2.875955104827881, | |
| "logits/rejected": -2.964559555053711, | |
| "logps/chosen": -347.6171569824219, | |
| "logps/rejected": -321.8453369140625, | |
| "loss": 0.523, | |
| "rewards/accuracies": 0.7208333611488342, | |
| "rewards/chosen": -0.030794035643339157, | |
| "rewards/margins": 1.1214120388031006, | |
| "rewards/rejected": -1.1522061824798584, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.38469087340529934, | |
| "grad_norm": 4.091563701629639, | |
| "learning_rate": 1.2323390894819467e-05, | |
| "logits/chosen": -2.8418350219726562, | |
| "logits/rejected": -2.8112716674804688, | |
| "logps/chosen": -309.3431091308594, | |
| "logps/rejected": -331.121826171875, | |
| "loss": 0.4678, | |
| "rewards/accuracies": 0.7833333611488342, | |
| "rewards/chosen": 0.0016341328155249357, | |
| "rewards/margins": 1.139491319656372, | |
| "rewards/rejected": -1.13785719871521, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3886162904808636, | |
| "grad_norm": 4.04428768157959, | |
| "learning_rate": 1.2244897959183674e-05, | |
| "logits/chosen": -3.047217845916748, | |
| "logits/rejected": -3.066889524459839, | |
| "logps/chosen": -317.3092956542969, | |
| "logps/rejected": -299.8882751464844, | |
| "loss": 0.5092, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.00645809480920434, | |
| "rewards/margins": 0.9209384918212891, | |
| "rewards/rejected": -0.9273965954780579, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.39254170755642787, | |
| "grad_norm": 4.938584804534912, | |
| "learning_rate": 1.2166405023547883e-05, | |
| "logits/chosen": -3.029782772064209, | |
| "logits/rejected": -3.041254758834839, | |
| "logps/chosen": -329.04168701171875, | |
| "logps/rejected": -326.0158996582031, | |
| "loss": 0.4689, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 0.10879228264093399, | |
| "rewards/margins": 1.0214567184448242, | |
| "rewards/rejected": -0.9126644134521484, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39254170755642787, | |
| "eval_logits/chosen": -3.0307250022888184, | |
| "eval_logits/rejected": -3.0587258338928223, | |
| "eval_logps/chosen": -328.3224792480469, | |
| "eval_logps/rejected": -321.09954833984375, | |
| "eval_loss": 0.5021634697914124, | |
| "eval_rewards/accuracies": 0.7450000047683716, | |
| "eval_rewards/chosen": 0.10410188138484955, | |
| "eval_rewards/margins": 0.971247136592865, | |
| "eval_rewards/rejected": -0.8671452403068542, | |
| "eval_runtime": 170.3972, | |
| "eval_samples_per_second": 11.737, | |
| "eval_steps_per_second": 5.869, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39646712463199213, | |
| "grad_norm": 6.137747287750244, | |
| "learning_rate": 1.2087912087912089e-05, | |
| "logits/chosen": -3.0541741847991943, | |
| "logits/rejected": -3.036529064178467, | |
| "logps/chosen": -306.2962341308594, | |
| "logps/rejected": -317.01666259765625, | |
| "loss": 0.5514, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.02994796633720398, | |
| "rewards/margins": 0.8114150762557983, | |
| "rewards/rejected": -0.7814672589302063, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.40039254170755645, | |
| "grad_norm": 5.219884872436523, | |
| "learning_rate": 1.2009419152276296e-05, | |
| "logits/chosen": -3.1144938468933105, | |
| "logits/rejected": -3.1160566806793213, | |
| "logps/chosen": -329.6716613769531, | |
| "logps/rejected": -319.11407470703125, | |
| "loss": 0.5221, | |
| "rewards/accuracies": 0.7625000476837158, | |
| "rewards/chosen": 0.043878063559532166, | |
| "rewards/margins": 1.0103065967559814, | |
| "rewards/rejected": -0.9664285778999329, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4043179587831207, | |
| "grad_norm": 4.543951511383057, | |
| "learning_rate": 1.1930926216640503e-05, | |
| "logits/chosen": -2.979218006134033, | |
| "logits/rejected": -2.968158483505249, | |
| "logps/chosen": -286.9298400878906, | |
| "logps/rejected": -313.0599060058594, | |
| "loss": 0.5016, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": -0.1237020492553711, | |
| "rewards/margins": 0.9319057464599609, | |
| "rewards/rejected": -1.055607795715332, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.408243375858685, | |
| "grad_norm": 5.305350303649902, | |
| "learning_rate": 1.1852433281004712e-05, | |
| "logits/chosen": -2.945699691772461, | |
| "logits/rejected": -2.975595474243164, | |
| "logps/chosen": -313.67431640625, | |
| "logps/rejected": -317.87261962890625, | |
| "loss": 0.4597, | |
| "rewards/accuracies": 0.7625000476837158, | |
| "rewards/chosen": 0.10118236392736435, | |
| "rewards/margins": 1.1309287548065186, | |
| "rewards/rejected": -1.0297462940216064, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.41216879293424924, | |
| "grad_norm": 4.5991692543029785, | |
| "learning_rate": 1.1773940345368918e-05, | |
| "logits/chosen": -2.9673571586608887, | |
| "logits/rejected": -2.9875621795654297, | |
| "logps/chosen": -318.8524475097656, | |
| "logps/rejected": -301.4873046875, | |
| "loss": 0.4501, | |
| "rewards/accuracies": 0.79583340883255, | |
| "rewards/chosen": -0.11968664824962616, | |
| "rewards/margins": 1.2507131099700928, | |
| "rewards/rejected": -1.370399832725525, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.41609421000981356, | |
| "grad_norm": 4.974186420440674, | |
| "learning_rate": 1.1695447409733125e-05, | |
| "logits/chosen": -2.9936716556549072, | |
| "logits/rejected": -3.038217544555664, | |
| "logps/chosen": -349.4075927734375, | |
| "logps/rejected": -314.17413330078125, | |
| "loss": 0.5153, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": -0.30208900570869446, | |
| "rewards/margins": 1.0824108123779297, | |
| "rewards/rejected": -1.3844999074935913, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4200196270853778, | |
| "grad_norm": 4.36262845993042, | |
| "learning_rate": 1.1616954474097332e-05, | |
| "logits/chosen": -3.0161938667297363, | |
| "logits/rejected": -3.0261764526367188, | |
| "logps/chosen": -342.5355529785156, | |
| "logps/rejected": -355.38604736328125, | |
| "loss": 0.4833, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.37566858530044556, | |
| "rewards/margins": 1.1315479278564453, | |
| "rewards/rejected": -1.5072165727615356, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.4239450441609421, | |
| "grad_norm": 4.01798677444458, | |
| "learning_rate": 1.1538461538461538e-05, | |
| "logits/chosen": -3.040135145187378, | |
| "logits/rejected": -3.107337474822998, | |
| "logps/chosen": -312.9956970214844, | |
| "logps/rejected": -323.22760009765625, | |
| "loss": 0.4472, | |
| "rewards/accuracies": 0.7916667461395264, | |
| "rewards/chosen": -0.5123935341835022, | |
| "rewards/margins": 1.0743831396102905, | |
| "rewards/rejected": -1.5867767333984375, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4278704612365064, | |
| "grad_norm": 5.212751388549805, | |
| "learning_rate": 1.1459968602825747e-05, | |
| "logits/chosen": -3.0182480812072754, | |
| "logits/rejected": -3.0287539958953857, | |
| "logps/chosen": -332.08709716796875, | |
| "logps/rejected": -337.66424560546875, | |
| "loss": 0.5333, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": -0.5322802066802979, | |
| "rewards/margins": 1.01456618309021, | |
| "rewards/rejected": -1.546846628189087, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.43179587831207067, | |
| "grad_norm": 4.944022178649902, | |
| "learning_rate": 1.1381475667189954e-05, | |
| "logits/chosen": -2.9780099391937256, | |
| "logits/rejected": -3.069441080093384, | |
| "logps/chosen": -343.2859191894531, | |
| "logps/rejected": -345.21441650390625, | |
| "loss": 0.5462, | |
| "rewards/accuracies": 0.7041666507720947, | |
| "rewards/chosen": -0.5427877306938171, | |
| "rewards/margins": 1.0927588939666748, | |
| "rewards/rejected": -1.6355466842651367, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.43572129538763493, | |
| "grad_norm": 3.889643669128418, | |
| "learning_rate": 1.1302982731554161e-05, | |
| "logits/chosen": -2.9730026721954346, | |
| "logits/rejected": -3.022367477416992, | |
| "logps/chosen": -324.47406005859375, | |
| "logps/rejected": -326.12353515625, | |
| "loss": 0.4898, | |
| "rewards/accuracies": 0.7708333134651184, | |
| "rewards/chosen": -0.42287206649780273, | |
| "rewards/margins": 1.078303575515747, | |
| "rewards/rejected": -1.5011756420135498, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.4396467124631992, | |
| "grad_norm": 5.789305210113525, | |
| "learning_rate": 1.1224489795918367e-05, | |
| "logits/chosen": -3.0275089740753174, | |
| "logits/rejected": -3.048832654953003, | |
| "logps/chosen": -327.5396423339844, | |
| "logps/rejected": -334.8791198730469, | |
| "loss": 0.4796, | |
| "rewards/accuracies": 0.7625000476837158, | |
| "rewards/chosen": -0.35886624455451965, | |
| "rewards/margins": 1.1369972229003906, | |
| "rewards/rejected": -1.4958635568618774, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4435721295387635, | |
| "grad_norm": 4.372865676879883, | |
| "learning_rate": 1.1145996860282576e-05, | |
| "logits/chosen": -3.0532193183898926, | |
| "logits/rejected": -3.059150457382202, | |
| "logps/chosen": -312.3909606933594, | |
| "logps/rejected": -333.62982177734375, | |
| "loss": 0.5414, | |
| "rewards/accuracies": 0.7000001072883606, | |
| "rewards/chosen": -0.38431116938591003, | |
| "rewards/margins": 0.9614561796188354, | |
| "rewards/rejected": -1.3457673788070679, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.4474975466143278, | |
| "grad_norm": 4.2039642333984375, | |
| "learning_rate": 1.1067503924646783e-05, | |
| "logits/chosen": -2.9767494201660156, | |
| "logits/rejected": -3.029498338699341, | |
| "logps/chosen": -304.53497314453125, | |
| "logps/rejected": -317.2914733886719, | |
| "loss": 0.4874, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.24222330749034882, | |
| "rewards/margins": 1.0617117881774902, | |
| "rewards/rejected": -1.303935170173645, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.45142296368989204, | |
| "grad_norm": 4.793886661529541, | |
| "learning_rate": 1.098901098901099e-05, | |
| "logits/chosen": -3.0597524642944336, | |
| "logits/rejected": -3.086867332458496, | |
| "logps/chosen": -341.6616516113281, | |
| "logps/rejected": -336.95037841796875, | |
| "loss": 0.4886, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.08540080487728119, | |
| "rewards/margins": 1.0467342138290405, | |
| "rewards/rejected": -1.132135033607483, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4553483807654563, | |
| "grad_norm": 14.698615074157715, | |
| "learning_rate": 1.0910518053375196e-05, | |
| "logits/chosen": -3.0199100971221924, | |
| "logits/rejected": -3.032982349395752, | |
| "logps/chosen": -356.33404541015625, | |
| "logps/rejected": -344.212890625, | |
| "loss": 0.459, | |
| "rewards/accuracies": 0.7666667699813843, | |
| "rewards/chosen": 0.0503697507083416, | |
| "rewards/margins": 1.1821739673614502, | |
| "rewards/rejected": -1.1318042278289795, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4592737978410206, | |
| "grad_norm": 3.817831516265869, | |
| "learning_rate": 1.0832025117739405e-05, | |
| "logits/chosen": -3.019148349761963, | |
| "logits/rejected": -2.993727207183838, | |
| "logps/chosen": -329.00787353515625, | |
| "logps/rejected": -309.19903564453125, | |
| "loss": 0.4704, | |
| "rewards/accuracies": 0.7666667699813843, | |
| "rewards/chosen": 0.019623804837465286, | |
| "rewards/margins": 1.1090670824050903, | |
| "rewards/rejected": -1.0894432067871094, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4631992149165849, | |
| "grad_norm": 4.845146656036377, | |
| "learning_rate": 1.0753532182103612e-05, | |
| "logits/chosen": -3.0271801948547363, | |
| "logits/rejected": -3.024869441986084, | |
| "logps/chosen": -352.64697265625, | |
| "logps/rejected": -353.69305419921875, | |
| "loss": 0.494, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.22464075684547424, | |
| "rewards/margins": 1.1474275588989258, | |
| "rewards/rejected": -1.3720684051513672, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.46712463199214915, | |
| "grad_norm": 4.333688735961914, | |
| "learning_rate": 1.067503924646782e-05, | |
| "logits/chosen": -3.0770018100738525, | |
| "logits/rejected": -3.119150400161743, | |
| "logps/chosen": -354.69439697265625, | |
| "logps/rejected": -327.85791015625, | |
| "loss": 0.456, | |
| "rewards/accuracies": 0.7791666388511658, | |
| "rewards/chosen": -0.06289488822221756, | |
| "rewards/margins": 1.210386037826538, | |
| "rewards/rejected": -1.2732809782028198, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.47105004906771347, | |
| "grad_norm": 4.578314304351807, | |
| "learning_rate": 1.0596546310832025e-05, | |
| "logits/chosen": -3.0018227100372314, | |
| "logits/rejected": -3.0865108966827393, | |
| "logps/chosen": -311.1248779296875, | |
| "logps/rejected": -309.74273681640625, | |
| "loss": 0.4818, | |
| "rewards/accuracies": 0.7750000357627869, | |
| "rewards/chosen": -0.05175580456852913, | |
| "rewards/margins": 1.112265944480896, | |
| "rewards/rejected": -1.1640217304229736, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47105004906771347, | |
| "eval_logits/chosen": -3.0338661670684814, | |
| "eval_logits/rejected": -3.0616049766540527, | |
| "eval_logps/chosen": -330.5699157714844, | |
| "eval_logps/rejected": -324.86346435546875, | |
| "eval_loss": 0.49363288283348083, | |
| "eval_rewards/accuracies": 0.7524999976158142, | |
| "eval_rewards/chosen": -0.1206398755311966, | |
| "eval_rewards/margins": 1.1228933334350586, | |
| "eval_rewards/rejected": -1.2435332536697388, | |
| "eval_runtime": 171.2479, | |
| "eval_samples_per_second": 11.679, | |
| "eval_steps_per_second": 5.839, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47497546614327774, | |
| "grad_norm": 5.259768009185791, | |
| "learning_rate": 1.0518053375196234e-05, | |
| "logits/chosen": -3.009653091430664, | |
| "logits/rejected": -3.0764992237091064, | |
| "logps/chosen": -346.80059814453125, | |
| "logps/rejected": -324.3446350097656, | |
| "loss": 0.5004, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": -0.03653601557016373, | |
| "rewards/margins": 1.1117885112762451, | |
| "rewards/rejected": -1.1483246088027954, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.478900883218842, | |
| "grad_norm": 6.143235206604004, | |
| "learning_rate": 1.0439560439560441e-05, | |
| "logits/chosen": -2.989396810531616, | |
| "logits/rejected": -3.0582377910614014, | |
| "logps/chosen": -337.9186096191406, | |
| "logps/rejected": -332.8787841796875, | |
| "loss": 0.575, | |
| "rewards/accuracies": 0.6875000596046448, | |
| "rewards/chosen": -0.15022191405296326, | |
| "rewards/margins": 1.013187050819397, | |
| "rewards/rejected": -1.163408875465393, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.48282630029440626, | |
| "grad_norm": 5.3697590827941895, | |
| "learning_rate": 1.0361067503924647e-05, | |
| "logits/chosen": -3.028064727783203, | |
| "logits/rejected": -3.033423662185669, | |
| "logps/chosen": -343.3203430175781, | |
| "logps/rejected": -327.01202392578125, | |
| "loss": 0.4706, | |
| "rewards/accuracies": 0.7791666984558105, | |
| "rewards/chosen": 0.030805181711912155, | |
| "rewards/margins": 1.2250150442123413, | |
| "rewards/rejected": -1.1942098140716553, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4867517173699706, | |
| "grad_norm": 3.8416597843170166, | |
| "learning_rate": 1.0282574568288854e-05, | |
| "logits/chosen": -2.909081220626831, | |
| "logits/rejected": -2.975391387939453, | |
| "logps/chosen": -323.62725830078125, | |
| "logps/rejected": -332.06524658203125, | |
| "loss": 0.4792, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": -0.17823012173175812, | |
| "rewards/margins": 1.1644293069839478, | |
| "rewards/rejected": -1.3426594734191895, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.49067713444553485, | |
| "grad_norm": 5.597027778625488, | |
| "learning_rate": 1.0204081632653063e-05, | |
| "logits/chosen": -3.052537441253662, | |
| "logits/rejected": -3.0111711025238037, | |
| "logps/chosen": -319.8970642089844, | |
| "logps/rejected": -322.13671875, | |
| "loss": 0.5098, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.31647107005119324, | |
| "rewards/margins": 1.0438673496246338, | |
| "rewards/rejected": -1.3603384494781494, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4946025515210991, | |
| "grad_norm": 5.787623405456543, | |
| "learning_rate": 1.012558869701727e-05, | |
| "logits/chosen": -2.932849407196045, | |
| "logits/rejected": -3.022317886352539, | |
| "logps/chosen": -350.6145324707031, | |
| "logps/rejected": -343.1180114746094, | |
| "loss": 0.5072, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": -0.10851552337408066, | |
| "rewards/margins": 1.0356271266937256, | |
| "rewards/rejected": -1.144142746925354, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4985279685966634, | |
| "grad_norm": 6.2099385261535645, | |
| "learning_rate": 1.0047095761381476e-05, | |
| "logits/chosen": -2.9498133659362793, | |
| "logits/rejected": -2.9196791648864746, | |
| "logps/chosen": -315.73150634765625, | |
| "logps/rejected": -315.6263427734375, | |
| "loss": 0.5515, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.16974535584449768, | |
| "rewards/margins": 0.9117358326911926, | |
| "rewards/rejected": -1.0814812183380127, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5024533856722276, | |
| "grad_norm": 4.4105448722839355, | |
| "learning_rate": 9.968602825745683e-06, | |
| "logits/chosen": -2.970156192779541, | |
| "logits/rejected": -2.9927220344543457, | |
| "logps/chosen": -296.1843566894531, | |
| "logps/rejected": -292.82244873046875, | |
| "loss": 0.5219, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.09649379551410675, | |
| "rewards/margins": 0.9780260324478149, | |
| "rewards/rejected": -1.0745197534561157, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5063788027477919, | |
| "grad_norm": 4.673651218414307, | |
| "learning_rate": 9.890109890109892e-06, | |
| "logits/chosen": -3.0400826930999756, | |
| "logits/rejected": -3.0831387042999268, | |
| "logps/chosen": -339.0031433105469, | |
| "logps/rejected": -329.25152587890625, | |
| "loss": 0.5512, | |
| "rewards/accuracies": 0.7041667699813843, | |
| "rewards/chosen": -0.14476463198661804, | |
| "rewards/margins": 0.8560221791267395, | |
| "rewards/rejected": -1.0007867813110352, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5103042198233563, | |
| "grad_norm": 4.254279613494873, | |
| "learning_rate": 9.811616954474098e-06, | |
| "logits/chosen": -3.0292727947235107, | |
| "logits/rejected": -3.0746917724609375, | |
| "logps/chosen": -313.74591064453125, | |
| "logps/rejected": -302.05279541015625, | |
| "loss": 0.5132, | |
| "rewards/accuracies": 0.7166666388511658, | |
| "rewards/chosen": -0.06440563499927521, | |
| "rewards/margins": 1.017377257347107, | |
| "rewards/rejected": -1.0817829370498657, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5142296368989205, | |
| "grad_norm": 4.169983863830566, | |
| "learning_rate": 9.733124018838307e-06, | |
| "logits/chosen": -2.944243907928467, | |
| "logits/rejected": -3.0422754287719727, | |
| "logps/chosen": -288.42559814453125, | |
| "logps/rejected": -296.96026611328125, | |
| "loss": 0.4622, | |
| "rewards/accuracies": 0.7958333492279053, | |
| "rewards/chosen": -0.060607265681028366, | |
| "rewards/margins": 1.0034582614898682, | |
| "rewards/rejected": -1.0640655755996704, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5181550539744848, | |
| "grad_norm": 4.819730281829834, | |
| "learning_rate": 9.654631083202512e-06, | |
| "logits/chosen": -2.9609310626983643, | |
| "logits/rejected": -3.0102219581604004, | |
| "logps/chosen": -329.1315002441406, | |
| "logps/rejected": -305.99737548828125, | |
| "loss": 0.5414, | |
| "rewards/accuracies": 0.6958333849906921, | |
| "rewards/chosen": -0.272332102060318, | |
| "rewards/margins": 0.9030130505561829, | |
| "rewards/rejected": -1.1753450632095337, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5220804710500491, | |
| "grad_norm": 4.765251159667969, | |
| "learning_rate": 9.576138147566721e-06, | |
| "logits/chosen": -2.911606550216675, | |
| "logits/rejected": -3.0551559925079346, | |
| "logps/chosen": -372.60357666015625, | |
| "logps/rejected": -350.260498046875, | |
| "loss": 0.5388, | |
| "rewards/accuracies": 0.7458333969116211, | |
| "rewards/chosen": -0.2132304608821869, | |
| "rewards/margins": 0.8807324171066284, | |
| "rewards/rejected": -1.0939629077911377, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5260058881256133, | |
| "grad_norm": 4.679354667663574, | |
| "learning_rate": 9.497645211930927e-06, | |
| "logits/chosen": -2.996860980987549, | |
| "logits/rejected": -2.969712734222412, | |
| "logps/chosen": -329.93768310546875, | |
| "logps/rejected": -349.2161560058594, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": -0.19874221086502075, | |
| "rewards/margins": 0.9041234254837036, | |
| "rewards/rejected": -1.1028656959533691, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5299313052011776, | |
| "grad_norm": 5.291715621948242, | |
| "learning_rate": 9.419152276295134e-06, | |
| "logits/chosen": -3.0396275520324707, | |
| "logits/rejected": -3.0426812171936035, | |
| "logps/chosen": -360.34552001953125, | |
| "logps/rejected": -345.2974853515625, | |
| "loss": 0.561, | |
| "rewards/accuracies": 0.6916667222976685, | |
| "rewards/chosen": -0.31488001346588135, | |
| "rewards/margins": 0.8902130126953125, | |
| "rewards/rejected": -1.2050931453704834, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5338567222767419, | |
| "grad_norm": 4.548229694366455, | |
| "learning_rate": 9.340659340659341e-06, | |
| "logits/chosen": -2.9637022018432617, | |
| "logits/rejected": -2.995466947555542, | |
| "logps/chosen": -349.22283935546875, | |
| "logps/rejected": -336.8186340332031, | |
| "loss": 0.5911, | |
| "rewards/accuracies": 0.67083340883255, | |
| "rewards/chosen": -0.22216463088989258, | |
| "rewards/margins": 0.75648432970047, | |
| "rewards/rejected": -0.978648841381073, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5377821393523062, | |
| "grad_norm": 3.729768753051758, | |
| "learning_rate": 9.262166405023548e-06, | |
| "logits/chosen": -2.9287047386169434, | |
| "logits/rejected": -2.999833106994629, | |
| "logps/chosen": -338.10162353515625, | |
| "logps/rejected": -330.71282958984375, | |
| "loss": 0.4436, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": 0.11126607656478882, | |
| "rewards/margins": 1.1149643659591675, | |
| "rewards/rejected": -1.003698468208313, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5417075564278705, | |
| "grad_norm": 3.5768096446990967, | |
| "learning_rate": 9.183673469387756e-06, | |
| "logits/chosen": -3.034193754196167, | |
| "logits/rejected": -3.030886650085449, | |
| "logps/chosen": -336.0780944824219, | |
| "logps/rejected": -328.44195556640625, | |
| "loss": 0.4591, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": 0.15392692387104034, | |
| "rewards/margins": 1.142258882522583, | |
| "rewards/rejected": -0.9883320927619934, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5456329735034348, | |
| "grad_norm": 4.376099109649658, | |
| "learning_rate": 9.105180533751963e-06, | |
| "logits/chosen": -2.9962871074676514, | |
| "logits/rejected": -3.0676894187927246, | |
| "logps/chosen": -312.1530456542969, | |
| "logps/rejected": -304.1636657714844, | |
| "loss": 0.4686, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.018533676862716675, | |
| "rewards/margins": 1.1388862133026123, | |
| "rewards/rejected": -1.1574198007583618, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.549558390578999, | |
| "grad_norm": 5.068153381347656, | |
| "learning_rate": 9.02668759811617e-06, | |
| "logits/chosen": -3.0012753009796143, | |
| "logits/rejected": -2.9406380653381348, | |
| "logps/chosen": -334.9573059082031, | |
| "logps/rejected": -336.917724609375, | |
| "loss": 0.5127, | |
| "rewards/accuracies": 0.7333333492279053, | |
| "rewards/chosen": -0.1398693174123764, | |
| "rewards/margins": 1.0154725313186646, | |
| "rewards/rejected": -1.1553419828414917, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.549558390578999, | |
| "eval_logits/chosen": -3.029242753982544, | |
| "eval_logits/rejected": -3.0569090843200684, | |
| "eval_logps/chosen": -332.35626220703125, | |
| "eval_logps/rejected": -325.6695861816406, | |
| "eval_loss": 0.49267128109931946, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -0.29927709698677063, | |
| "eval_rewards/margins": 1.0248706340789795, | |
| "eval_rewards/rejected": -1.3241477012634277, | |
| "eval_runtime": 170.0257, | |
| "eval_samples_per_second": 11.763, | |
| "eval_steps_per_second": 5.881, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5534838076545633, | |
| "grad_norm": 5.191127777099609, | |
| "learning_rate": 8.948194662480377e-06, | |
| "logits/chosen": -2.9839444160461426, | |
| "logits/rejected": -3.006364345550537, | |
| "logps/chosen": -337.6143798828125, | |
| "logps/rejected": -352.8746643066406, | |
| "loss": 0.5361, | |
| "rewards/accuracies": 0.7125000357627869, | |
| "rewards/chosen": -0.30696621537208557, | |
| "rewards/margins": 1.0262271165847778, | |
| "rewards/rejected": -1.3331931829452515, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5574092247301276, | |
| "grad_norm": 3.932312250137329, | |
| "learning_rate": 8.869701726844585e-06, | |
| "logits/chosen": -2.9533979892730713, | |
| "logits/rejected": -3.0565037727355957, | |
| "logps/chosen": -316.4660949707031, | |
| "logps/rejected": -302.0240783691406, | |
| "loss": 0.4628, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.23080229759216309, | |
| "rewards/margins": 1.1649229526519775, | |
| "rewards/rejected": -1.3957254886627197, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5613346418056918, | |
| "grad_norm": 6.213256359100342, | |
| "learning_rate": 8.791208791208792e-06, | |
| "logits/chosen": -2.9602789878845215, | |
| "logits/rejected": -2.960151195526123, | |
| "logps/chosen": -283.5656433105469, | |
| "logps/rejected": -263.10260009765625, | |
| "loss": 0.4912, | |
| "rewards/accuracies": 0.7458332777023315, | |
| "rewards/chosen": -0.20947471261024475, | |
| "rewards/margins": 1.0615582466125488, | |
| "rewards/rejected": -1.2710330486297607, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5652600588812562, | |
| "grad_norm": 3.982071876525879, | |
| "learning_rate": 8.712715855573e-06, | |
| "logits/chosen": -2.966439723968506, | |
| "logits/rejected": -3.0421411991119385, | |
| "logps/chosen": -328.9455261230469, | |
| "logps/rejected": -314.2897644042969, | |
| "loss": 0.4648, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.02404719963669777, | |
| "rewards/margins": 1.0856393575668335, | |
| "rewards/rejected": -1.1096864938735962, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5691854759568205, | |
| "grad_norm": 5.195390701293945, | |
| "learning_rate": 8.634222919937206e-06, | |
| "logits/chosen": -3.0160458087921143, | |
| "logits/rejected": -3.0692107677459717, | |
| "logps/chosen": -332.7279357910156, | |
| "logps/rejected": -336.43927001953125, | |
| "loss": 0.5002, | |
| "rewards/accuracies": 0.7291666269302368, | |
| "rewards/chosen": -0.19675110280513763, | |
| "rewards/margins": 0.985478401184082, | |
| "rewards/rejected": -1.1822296380996704, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5731108930323847, | |
| "grad_norm": 4.930713653564453, | |
| "learning_rate": 8.555729984301414e-06, | |
| "logits/chosen": -3.0057716369628906, | |
| "logits/rejected": -3.0269782543182373, | |
| "logps/chosen": -306.15875244140625, | |
| "logps/rejected": -305.45416259765625, | |
| "loss": 0.4791, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.07756291329860687, | |
| "rewards/margins": 1.0487645864486694, | |
| "rewards/rejected": -1.1263275146484375, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.577036310107949, | |
| "grad_norm": 3.645521640777588, | |
| "learning_rate": 8.477237048665621e-06, | |
| "logits/chosen": -3.024445056915283, | |
| "logits/rejected": -3.0869052410125732, | |
| "logps/chosen": -303.17327880859375, | |
| "logps/rejected": -301.08905029296875, | |
| "loss": 0.4716, | |
| "rewards/accuracies": 0.7666667699813843, | |
| "rewards/chosen": -0.038672782480716705, | |
| "rewards/margins": 1.1686241626739502, | |
| "rewards/rejected": -1.2072969675064087, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5809617271835132, | |
| "grad_norm": 4.946695327758789, | |
| "learning_rate": 8.398744113029828e-06, | |
| "logits/chosen": -3.0039095878601074, | |
| "logits/rejected": -3.015929698944092, | |
| "logps/chosen": -322.4432373046875, | |
| "logps/rejected": -309.04107666015625, | |
| "loss": 0.5134, | |
| "rewards/accuracies": 0.7458333373069763, | |
| "rewards/chosen": -0.12716850638389587, | |
| "rewards/margins": 0.9853676557540894, | |
| "rewards/rejected": -1.1125361919403076, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5848871442590775, | |
| "grad_norm": 5.070699214935303, | |
| "learning_rate": 8.320251177394036e-06, | |
| "logits/chosen": -3.0126490592956543, | |
| "logits/rejected": -3.098520278930664, | |
| "logps/chosen": -357.4333190917969, | |
| "logps/rejected": -332.8435363769531, | |
| "loss": 0.5058, | |
| "rewards/accuracies": 0.7625000476837158, | |
| "rewards/chosen": -0.04318712279200554, | |
| "rewards/margins": 1.034911036491394, | |
| "rewards/rejected": -1.078098177909851, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5888125613346418, | |
| "grad_norm": 3.6236932277679443, | |
| "learning_rate": 8.241758241758243e-06, | |
| "logits/chosen": -2.9999072551727295, | |
| "logits/rejected": -2.990403413772583, | |
| "logps/chosen": -357.82733154296875, | |
| "logps/rejected": -326.899169921875, | |
| "loss": 0.4588, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.11650122702121735, | |
| "rewards/margins": 1.1574772596359253, | |
| "rewards/rejected": -1.2739784717559814, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.592737978410206, | |
| "grad_norm": 4.285853862762451, | |
| "learning_rate": 8.16326530612245e-06, | |
| "logits/chosen": -3.0003159046173096, | |
| "logits/rejected": -3.048569679260254, | |
| "logps/chosen": -349.8592834472656, | |
| "logps/rejected": -304.07427978515625, | |
| "loss": 0.4784, | |
| "rewards/accuracies": 0.7750000953674316, | |
| "rewards/chosen": -0.07542826980352402, | |
| "rewards/margins": 1.2488701343536377, | |
| "rewards/rejected": -1.32429838180542, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5966633954857704, | |
| "grad_norm": 4.365904331207275, | |
| "learning_rate": 8.084772370486657e-06, | |
| "logits/chosen": -3.0357697010040283, | |
| "logits/rejected": -3.12182879447937, | |
| "logps/chosen": -317.8271484375, | |
| "logps/rejected": -298.142578125, | |
| "loss": 0.4855, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.06173648685216904, | |
| "rewards/margins": 1.079683542251587, | |
| "rewards/rejected": -1.1414198875427246, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6005888125613347, | |
| "grad_norm": 4.72523307800293, | |
| "learning_rate": 8.006279434850865e-06, | |
| "logits/chosen": -3.0452723503112793, | |
| "logits/rejected": -3.0594534873962402, | |
| "logps/chosen": -317.8534240722656, | |
| "logps/rejected": -329.19976806640625, | |
| "loss": 0.5079, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": -0.3027920126914978, | |
| "rewards/margins": 1.1086828708648682, | |
| "rewards/rejected": -1.4114749431610107, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6045142296368989, | |
| "grad_norm": 4.864989757537842, | |
| "learning_rate": 7.927786499215072e-06, | |
| "logits/chosen": -3.0068793296813965, | |
| "logits/rejected": -3.017885446548462, | |
| "logps/chosen": -314.66876220703125, | |
| "logps/rejected": -317.1419677734375, | |
| "loss": 0.4965, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": -0.18796098232269287, | |
| "rewards/margins": 1.0385706424713135, | |
| "rewards/rejected": -1.2265316247940063, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6084396467124632, | |
| "grad_norm": 4.275363922119141, | |
| "learning_rate": 7.849293563579279e-06, | |
| "logits/chosen": -3.002530574798584, | |
| "logits/rejected": -2.980320453643799, | |
| "logps/chosen": -317.7269287109375, | |
| "logps/rejected": -339.15997314453125, | |
| "loss": 0.4468, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.04108821228146553, | |
| "rewards/margins": 1.3123092651367188, | |
| "rewards/rejected": -1.3533976078033447, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6123650637880275, | |
| "grad_norm": 4.48534631729126, | |
| "learning_rate": 7.770800627943486e-06, | |
| "logits/chosen": -2.961935520172119, | |
| "logits/rejected": -3.0330467224121094, | |
| "logps/chosen": -370.4855041503906, | |
| "logps/rejected": -328.0148010253906, | |
| "loss": 0.5256, | |
| "rewards/accuracies": 0.7791666984558105, | |
| "rewards/chosen": 0.09827003628015518, | |
| "rewards/margins": 1.1447842121124268, | |
| "rewards/rejected": -1.0465141534805298, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6162904808635917, | |
| "grad_norm": 4.550291061401367, | |
| "learning_rate": 7.692307692307694e-06, | |
| "logits/chosen": -3.0217397212982178, | |
| "logits/rejected": -3.0674948692321777, | |
| "logps/chosen": -317.39971923828125, | |
| "logps/rejected": -297.3795471191406, | |
| "loss": 0.5762, | |
| "rewards/accuracies": 0.7166666984558105, | |
| "rewards/chosen": -0.1348334103822708, | |
| "rewards/margins": 0.9545001983642578, | |
| "rewards/rejected": -1.0893336534500122, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.620215897939156, | |
| "grad_norm": 3.8520846366882324, | |
| "learning_rate": 7.6138147566719e-06, | |
| "logits/chosen": -3.0688512325286865, | |
| "logits/rejected": -3.068896770477295, | |
| "logps/chosen": -332.6806640625, | |
| "logps/rejected": -327.0592956542969, | |
| "loss": 0.4392, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.005734431557357311, | |
| "rewards/margins": 1.1970218420028687, | |
| "rewards/rejected": -1.2027562856674194, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6241413150147204, | |
| "grad_norm": 5.522543907165527, | |
| "learning_rate": 7.535321821036108e-06, | |
| "logits/chosen": -3.0281357765197754, | |
| "logits/rejected": -3.069322109222412, | |
| "logps/chosen": -318.053466796875, | |
| "logps/rejected": -313.0615234375, | |
| "loss": 0.5247, | |
| "rewards/accuracies": 0.7083333730697632, | |
| "rewards/chosen": -0.1159566193819046, | |
| "rewards/margins": 0.977625846862793, | |
| "rewards/rejected": -1.0935826301574707, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6280667320902846, | |
| "grad_norm": 5.020391941070557, | |
| "learning_rate": 7.4568288854003145e-06, | |
| "logits/chosen": -2.9681613445281982, | |
| "logits/rejected": -2.948774814605713, | |
| "logps/chosen": -342.89019775390625, | |
| "logps/rejected": -317.9969482421875, | |
| "loss": 0.4847, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": -0.22335605323314667, | |
| "rewards/margins": 1.1023824214935303, | |
| "rewards/rejected": -1.3257384300231934, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6280667320902846, | |
| "eval_logits/chosen": -3.0195696353912354, | |
| "eval_logits/rejected": -3.047680139541626, | |
| "eval_logps/chosen": -330.66943359375, | |
| "eval_logps/rejected": -324.49725341796875, | |
| "eval_loss": 0.489461213350296, | |
| "eval_rewards/accuracies": 0.7534999847412109, | |
| "eval_rewards/chosen": -0.13059695065021515, | |
| "eval_rewards/margins": 1.0763192176818848, | |
| "eval_rewards/rejected": -1.2069162130355835, | |
| "eval_runtime": 170.5252, | |
| "eval_samples_per_second": 11.728, | |
| "eval_steps_per_second": 5.864, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6319921491658489, | |
| "grad_norm": 3.8706929683685303, | |
| "learning_rate": 7.378335949764521e-06, | |
| "logits/chosen": -3.020859479904175, | |
| "logits/rejected": -3.0139949321746826, | |
| "logps/chosen": -333.51654052734375, | |
| "logps/rejected": -324.34991455078125, | |
| "loss": 0.5385, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": -0.24091720581054688, | |
| "rewards/margins": 0.9881976246833801, | |
| "rewards/rejected": -1.2291150093078613, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6359175662414132, | |
| "grad_norm": 3.939385175704956, | |
| "learning_rate": 7.299843014128729e-06, | |
| "logits/chosen": -2.999584674835205, | |
| "logits/rejected": -2.953639507293701, | |
| "logps/chosen": -311.6091003417969, | |
| "logps/rejected": -367.5775451660156, | |
| "loss": 0.4001, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.0669659823179245, | |
| "rewards/margins": 1.300567388534546, | |
| "rewards/rejected": -1.3675333261489868, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6398429833169774, | |
| "grad_norm": 3.8747594356536865, | |
| "learning_rate": 7.2213500784929355e-06, | |
| "logits/chosen": -2.9922289848327637, | |
| "logits/rejected": -3.0901122093200684, | |
| "logps/chosen": -351.9647216796875, | |
| "logps/rejected": -324.533935546875, | |
| "loss": 0.4751, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.19494260847568512, | |
| "rewards/margins": 1.1178690195083618, | |
| "rewards/rejected": -1.3128114938735962, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6437684003925417, | |
| "grad_norm": 4.751869201660156, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "logits/chosen": -3.0304911136627197, | |
| "logits/rejected": -3.0640957355499268, | |
| "logps/chosen": -350.48040771484375, | |
| "logps/rejected": -304.4465026855469, | |
| "loss": 0.4689, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.17158253490924835, | |
| "rewards/margins": 1.1375932693481445, | |
| "rewards/rejected": -1.309175968170166, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.647693817468106, | |
| "grad_norm": 3.528496503829956, | |
| "learning_rate": 7.06436420722135e-06, | |
| "logits/chosen": -2.954141139984131, | |
| "logits/rejected": -3.0600366592407227, | |
| "logps/chosen": -344.02496337890625, | |
| "logps/rejected": -313.1765441894531, | |
| "loss": 0.4165, | |
| "rewards/accuracies": 0.7958333492279053, | |
| "rewards/chosen": 0.023122036829590797, | |
| "rewards/margins": 1.3192641735076904, | |
| "rewards/rejected": -1.2961422204971313, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6516192345436702, | |
| "grad_norm": 4.554298400878906, | |
| "learning_rate": 6.985871271585558e-06, | |
| "logits/chosen": -2.985323190689087, | |
| "logits/rejected": -3.0694005489349365, | |
| "logps/chosen": -347.07855224609375, | |
| "logps/rejected": -321.8019714355469, | |
| "loss": 0.4487, | |
| "rewards/accuracies": 0.7833333015441895, | |
| "rewards/chosen": 0.15593689680099487, | |
| "rewards/margins": 1.35723078250885, | |
| "rewards/rejected": -1.2012939453125, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6555446516192346, | |
| "grad_norm": 4.962828159332275, | |
| "learning_rate": 6.9073783359497645e-06, | |
| "logits/chosen": -2.939275026321411, | |
| "logits/rejected": -3.06274151802063, | |
| "logps/chosen": -318.2911376953125, | |
| "logps/rejected": -321.3827209472656, | |
| "loss": 0.453, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.16927500069141388, | |
| "rewards/margins": 1.2757118940353394, | |
| "rewards/rejected": -1.106436848640442, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.6594700686947988, | |
| "grad_norm": 3.8393807411193848, | |
| "learning_rate": 6.828885400313973e-06, | |
| "logits/chosen": -2.948727607727051, | |
| "logits/rejected": -2.9849319458007812, | |
| "logps/chosen": -278.67950439453125, | |
| "logps/rejected": -289.4703063964844, | |
| "loss": 0.5496, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.003879111958667636, | |
| "rewards/margins": 0.9358898997306824, | |
| "rewards/rejected": -0.9397690892219543, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6633954857703631, | |
| "grad_norm": 4.3185577392578125, | |
| "learning_rate": 6.750392464678179e-06, | |
| "logits/chosen": -2.9654245376586914, | |
| "logits/rejected": -3.0434441566467285, | |
| "logps/chosen": -327.6491394042969, | |
| "logps/rejected": -321.876953125, | |
| "loss": 0.4393, | |
| "rewards/accuracies": 0.8083333969116211, | |
| "rewards/chosen": -0.07706048339605331, | |
| "rewards/margins": 1.2577455043792725, | |
| "rewards/rejected": -1.3348058462142944, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6673209028459274, | |
| "grad_norm": 4.293339252471924, | |
| "learning_rate": 6.671899529042387e-06, | |
| "logits/chosen": -3.1013782024383545, | |
| "logits/rejected": -3.0577914714813232, | |
| "logps/chosen": -321.20611572265625, | |
| "logps/rejected": -330.765380859375, | |
| "loss": 0.4861, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.0720919817686081, | |
| "rewards/margins": 1.2457802295684814, | |
| "rewards/rejected": -1.3178722858428955, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6712463199214916, | |
| "grad_norm": 4.7395734786987305, | |
| "learning_rate": 6.5934065934065935e-06, | |
| "logits/chosen": -2.967613697052002, | |
| "logits/rejected": -3.026918411254883, | |
| "logps/chosen": -309.9462890625, | |
| "logps/rejected": -322.9964904785156, | |
| "loss": 0.5118, | |
| "rewards/accuracies": 0.7416667342185974, | |
| "rewards/chosen": -0.34576496481895447, | |
| "rewards/margins": 1.138620138168335, | |
| "rewards/rejected": -1.4843851327896118, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6751717369970559, | |
| "grad_norm": 4.396761417388916, | |
| "learning_rate": 6.514913657770802e-06, | |
| "logits/chosen": -2.872307300567627, | |
| "logits/rejected": -2.9442195892333984, | |
| "logps/chosen": -344.57275390625, | |
| "logps/rejected": -361.8569030761719, | |
| "loss": 0.4233, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.27561578154563904, | |
| "rewards/margins": 1.3650743961334229, | |
| "rewards/rejected": -1.6406902074813843, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6790971540726202, | |
| "grad_norm": 5.042901039123535, | |
| "learning_rate": 6.436420722135008e-06, | |
| "logits/chosen": -2.9460458755493164, | |
| "logits/rejected": -2.9742355346679688, | |
| "logps/chosen": -318.40972900390625, | |
| "logps/rejected": -338.845703125, | |
| "loss": 0.5214, | |
| "rewards/accuracies": 0.73333340883255, | |
| "rewards/chosen": -0.464036762714386, | |
| "rewards/margins": 1.0700992345809937, | |
| "rewards/rejected": -1.5341359376907349, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6830225711481845, | |
| "grad_norm": 5.222243785858154, | |
| "learning_rate": 6.357927786499215e-06, | |
| "logits/chosen": -2.948620080947876, | |
| "logits/rejected": -3.0497138500213623, | |
| "logps/chosen": -319.1834716796875, | |
| "logps/rejected": -319.8209228515625, | |
| "loss": 0.4982, | |
| "rewards/accuracies": 0.7708333730697632, | |
| "rewards/chosen": -0.6082950830459595, | |
| "rewards/margins": 1.108607530593872, | |
| "rewards/rejected": -1.7169023752212524, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6869479882237488, | |
| "grad_norm": 4.216038703918457, | |
| "learning_rate": 6.279434850863423e-06, | |
| "logits/chosen": -2.9881601333618164, | |
| "logits/rejected": -2.9443399906158447, | |
| "logps/chosen": -347.36181640625, | |
| "logps/rejected": -356.3033447265625, | |
| "loss": 0.4577, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.43880724906921387, | |
| "rewards/margins": 1.2593494653701782, | |
| "rewards/rejected": -1.698156714439392, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6908734052993131, | |
| "grad_norm": 4.496317386627197, | |
| "learning_rate": 6.20094191522763e-06, | |
| "logits/chosen": -2.937516450881958, | |
| "logits/rejected": -2.9816195964813232, | |
| "logps/chosen": -338.54888916015625, | |
| "logps/rejected": -316.7308654785156, | |
| "loss": 0.4974, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": -0.38299983739852905, | |
| "rewards/margins": 1.1357471942901611, | |
| "rewards/rejected": -1.518747091293335, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6947988223748773, | |
| "grad_norm": 5.088541507720947, | |
| "learning_rate": 6.122448979591837e-06, | |
| "logits/chosen": -2.9346837997436523, | |
| "logits/rejected": -3.0072033405303955, | |
| "logps/chosen": -323.017578125, | |
| "logps/rejected": -321.0302429199219, | |
| "loss": 0.5165, | |
| "rewards/accuracies": 0.7333334684371948, | |
| "rewards/chosen": -0.16942360997200012, | |
| "rewards/margins": 1.0610682964324951, | |
| "rewards/rejected": -1.2304918766021729, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.6987242394504416, | |
| "grad_norm": 5.229645729064941, | |
| "learning_rate": 6.043956043956044e-06, | |
| "logits/chosen": -3.0896613597869873, | |
| "logits/rejected": -3.1020538806915283, | |
| "logps/chosen": -340.7657775878906, | |
| "logps/rejected": -308.33831787109375, | |
| "loss": 0.4848, | |
| "rewards/accuracies": 0.7666666507720947, | |
| "rewards/chosen": -0.14006583392620087, | |
| "rewards/margins": 1.063689947128296, | |
| "rewards/rejected": -1.2037558555603027, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7026496565260059, | |
| "grad_norm": 4.50402307510376, | |
| "learning_rate": 5.965463108320252e-06, | |
| "logits/chosen": -2.930692195892334, | |
| "logits/rejected": -3.0338551998138428, | |
| "logps/chosen": -322.0299072265625, | |
| "logps/rejected": -337.92059326171875, | |
| "loss": 0.4844, | |
| "rewards/accuracies": 0.7333333492279053, | |
| "rewards/chosen": -0.06006438657641411, | |
| "rewards/margins": 1.1535618305206299, | |
| "rewards/rejected": -1.2136261463165283, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7065750736015701, | |
| "grad_norm": 3.8728554248809814, | |
| "learning_rate": 5.886970172684459e-06, | |
| "logits/chosen": -2.997563123703003, | |
| "logits/rejected": -3.0621156692504883, | |
| "logps/chosen": -320.8427429199219, | |
| "logps/rejected": -323.98663330078125, | |
| "loss": 0.5245, | |
| "rewards/accuracies": 0.7291667461395264, | |
| "rewards/chosen": -0.022658739238977432, | |
| "rewards/margins": 1.0391814708709717, | |
| "rewards/rejected": -1.061840295791626, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7065750736015701, | |
| "eval_logits/chosen": -3.025604724884033, | |
| "eval_logits/rejected": -3.0542104244232178, | |
| "eval_logps/chosen": -331.0598449707031, | |
| "eval_logps/rejected": -325.0303955078125, | |
| "eval_loss": 0.4869418144226074, | |
| "eval_rewards/accuracies": 0.7524999976158142, | |
| "eval_rewards/chosen": -0.16963602602481842, | |
| "eval_rewards/margins": 1.0905920267105103, | |
| "eval_rewards/rejected": -1.2602282762527466, | |
| "eval_runtime": 171.3633, | |
| "eval_samples_per_second": 11.671, | |
| "eval_steps_per_second": 5.836, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7105004906771345, | |
| "grad_norm": 4.844978332519531, | |
| "learning_rate": 5.808477237048666e-06, | |
| "logits/chosen": -3.054149627685547, | |
| "logits/rejected": -3.120988368988037, | |
| "logps/chosen": -354.59063720703125, | |
| "logps/rejected": -327.55731201171875, | |
| "loss": 0.5112, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.17106744647026062, | |
| "rewards/margins": 1.1445057392120361, | |
| "rewards/rejected": -1.3155733346939087, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7144259077526988, | |
| "grad_norm": 3.322969675064087, | |
| "learning_rate": 5.729984301412873e-06, | |
| "logits/chosen": -2.979074716567993, | |
| "logits/rejected": -3.056349277496338, | |
| "logps/chosen": -341.1482238769531, | |
| "logps/rejected": -330.65411376953125, | |
| "loss": 0.4561, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.04363600164651871, | |
| "rewards/margins": 1.232508659362793, | |
| "rewards/rejected": -1.2761447429656982, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.718351324828263, | |
| "grad_norm": 5.024198532104492, | |
| "learning_rate": 5.651491365777081e-06, | |
| "logits/chosen": -3.0509443283081055, | |
| "logits/rejected": -2.996936321258545, | |
| "logps/chosen": -318.218017578125, | |
| "logps/rejected": -317.7236022949219, | |
| "loss": 0.4975, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.0626036748290062, | |
| "rewards/margins": 1.1322224140167236, | |
| "rewards/rejected": -1.1948261260986328, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7222767419038273, | |
| "grad_norm": 4.653200149536133, | |
| "learning_rate": 5.572998430141288e-06, | |
| "logits/chosen": -3.0015101432800293, | |
| "logits/rejected": -3.004070520401001, | |
| "logps/chosen": -353.28912353515625, | |
| "logps/rejected": -330.3291320800781, | |
| "loss": 0.4619, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.03766501322388649, | |
| "rewards/margins": 1.2282439470291138, | |
| "rewards/rejected": -1.2659088373184204, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7262021589793916, | |
| "grad_norm": 4.00246524810791, | |
| "learning_rate": 5.494505494505495e-06, | |
| "logits/chosen": -2.997545003890991, | |
| "logits/rejected": -3.094526767730713, | |
| "logps/chosen": -299.0417785644531, | |
| "logps/rejected": -316.5038146972656, | |
| "loss": 0.4866, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": -0.06449007242918015, | |
| "rewards/margins": 1.0651543140411377, | |
| "rewards/rejected": -1.1296443939208984, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7301275760549558, | |
| "grad_norm": 4.44005823135376, | |
| "learning_rate": 5.4160125588697024e-06, | |
| "logits/chosen": -3.0456299781799316, | |
| "logits/rejected": -3.024275541305542, | |
| "logps/chosen": -305.1586608886719, | |
| "logps/rejected": -291.7957763671875, | |
| "loss": 0.4836, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": -0.02189583331346512, | |
| "rewards/margins": 0.9891737103462219, | |
| "rewards/rejected": -1.011069655418396, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7340529931305201, | |
| "grad_norm": 5.462418556213379, | |
| "learning_rate": 5.33751962323391e-06, | |
| "logits/chosen": -2.901125907897949, | |
| "logits/rejected": -2.9909684658050537, | |
| "logps/chosen": -326.17791748046875, | |
| "logps/rejected": -334.52166748046875, | |
| "loss": 0.5115, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.09602449834346771, | |
| "rewards/margins": 0.9841717481613159, | |
| "rewards/rejected": -1.0801963806152344, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7379784102060843, | |
| "grad_norm": 4.357146739959717, | |
| "learning_rate": 5.259026687598117e-06, | |
| "logits/chosen": -3.0265889167785645, | |
| "logits/rejected": -3.0476319789886475, | |
| "logps/chosen": -298.66705322265625, | |
| "logps/rejected": -311.0827331542969, | |
| "loss": 0.4936, | |
| "rewards/accuracies": 0.7625000476837158, | |
| "rewards/chosen": -0.18625633418560028, | |
| "rewards/margins": 0.9413064122200012, | |
| "rewards/rejected": -1.1275627613067627, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7419038272816487, | |
| "grad_norm": 5.111855983734131, | |
| "learning_rate": 5.180533751962323e-06, | |
| "logits/chosen": -3.0872814655303955, | |
| "logits/rejected": -3.0954253673553467, | |
| "logps/chosen": -331.3386535644531, | |
| "logps/rejected": -317.2918701171875, | |
| "loss": 0.5034, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.17156757414340973, | |
| "rewards/margins": 1.000931978225708, | |
| "rewards/rejected": -1.172499418258667, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.745829244357213, | |
| "grad_norm": 5.880111217498779, | |
| "learning_rate": 5.1020408163265315e-06, | |
| "logits/chosen": -2.9598050117492676, | |
| "logits/rejected": -3.0074758529663086, | |
| "logps/chosen": -311.2569274902344, | |
| "logps/rejected": -295.6765441894531, | |
| "loss": 0.5429, | |
| "rewards/accuracies": 0.6750000715255737, | |
| "rewards/chosen": -0.17758509516716003, | |
| "rewards/margins": 0.9686153531074524, | |
| "rewards/rejected": -1.14620041847229, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7497546614327772, | |
| "grad_norm": 4.473018169403076, | |
| "learning_rate": 5.023547880690738e-06, | |
| "logits/chosen": -2.981584072113037, | |
| "logits/rejected": -3.0066146850585938, | |
| "logps/chosen": -322.27655029296875, | |
| "logps/rejected": -333.5732421875, | |
| "loss": 0.4551, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.10117676109075546, | |
| "rewards/margins": 1.1731947660446167, | |
| "rewards/rejected": -1.2743713855743408, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.7536800785083415, | |
| "grad_norm": 5.427559852600098, | |
| "learning_rate": 4.945054945054946e-06, | |
| "logits/chosen": -3.038059949874878, | |
| "logits/rejected": -3.0491480827331543, | |
| "logps/chosen": -304.99285888671875, | |
| "logps/rejected": -331.29669189453125, | |
| "loss": 0.5048, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": -0.1531282365322113, | |
| "rewards/margins": 1.0113624334335327, | |
| "rewards/rejected": -1.1644906997680664, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7576054955839058, | |
| "grad_norm": 5.369307994842529, | |
| "learning_rate": 4.866562009419153e-06, | |
| "logits/chosen": -3.041508197784424, | |
| "logits/rejected": -3.017305374145508, | |
| "logps/chosen": -309.01409912109375, | |
| "logps/rejected": -324.8525390625, | |
| "loss": 0.468, | |
| "rewards/accuracies": 0.7750000357627869, | |
| "rewards/chosen": -0.14672012627124786, | |
| "rewards/margins": 1.1099258661270142, | |
| "rewards/rejected": -1.256645917892456, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.76153091265947, | |
| "grad_norm": 4.683850288391113, | |
| "learning_rate": 4.7880690737833605e-06, | |
| "logits/chosen": -3.0513949394226074, | |
| "logits/rejected": -3.1184747219085693, | |
| "logps/chosen": -349.2635192871094, | |
| "logps/rejected": -324.3399963378906, | |
| "loss": 0.54, | |
| "rewards/accuracies": 0.7125000357627869, | |
| "rewards/chosen": -0.11181743443012238, | |
| "rewards/margins": 0.9319450259208679, | |
| "rewards/rejected": -1.0437625646591187, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7654563297350343, | |
| "grad_norm": 4.057793617248535, | |
| "learning_rate": 4.709576138147567e-06, | |
| "logits/chosen": -3.0879323482513428, | |
| "logits/rejected": -3.1031229496002197, | |
| "logps/chosen": -345.81207275390625, | |
| "logps/rejected": -354.1678466796875, | |
| "loss": 0.4916, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": 0.02626100741326809, | |
| "rewards/margins": 1.1445354223251343, | |
| "rewards/rejected": -1.118274450302124, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7693817468105987, | |
| "grad_norm": 3.788588285446167, | |
| "learning_rate": 4.631083202511774e-06, | |
| "logits/chosen": -3.0559887886047363, | |
| "logits/rejected": -2.9645016193389893, | |
| "logps/chosen": -329.69366455078125, | |
| "logps/rejected": -323.6266174316406, | |
| "loss": 0.5673, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.006841002497822046, | |
| "rewards/margins": 1.0048836469650269, | |
| "rewards/rejected": -0.9980427026748657, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7733071638861629, | |
| "grad_norm": 4.364587306976318, | |
| "learning_rate": 4.5525902668759815e-06, | |
| "logits/chosen": -2.923006296157837, | |
| "logits/rejected": -3.013233184814453, | |
| "logps/chosen": -316.62188720703125, | |
| "logps/rejected": -303.6081848144531, | |
| "loss": 0.5261, | |
| "rewards/accuracies": 0.7083333730697632, | |
| "rewards/chosen": -0.10443178564310074, | |
| "rewards/margins": 1.0743087530136108, | |
| "rewards/rejected": -1.1787405014038086, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.7772325809617272, | |
| "grad_norm": 5.203098773956299, | |
| "learning_rate": 4.474097331240189e-06, | |
| "logits/chosen": -3.0155367851257324, | |
| "logits/rejected": -3.095081090927124, | |
| "logps/chosen": -320.33074951171875, | |
| "logps/rejected": -339.04791259765625, | |
| "loss": 0.5602, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.04893790930509567, | |
| "rewards/margins": 1.0242021083831787, | |
| "rewards/rejected": -0.9752641916275024, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7811579980372915, | |
| "grad_norm": 5.797107219696045, | |
| "learning_rate": 4.395604395604396e-06, | |
| "logits/chosen": -2.9568419456481934, | |
| "logits/rejected": -3.061304807662964, | |
| "logps/chosen": -323.64459228515625, | |
| "logps/rejected": -306.8099060058594, | |
| "loss": 0.4912, | |
| "rewards/accuracies": 0.7458333969116211, | |
| "rewards/chosen": -0.03198995441198349, | |
| "rewards/margins": 1.1657757759094238, | |
| "rewards/rejected": -1.1977657079696655, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.7850834151128557, | |
| "grad_norm": 3.8195958137512207, | |
| "learning_rate": 4.317111459968603e-06, | |
| "logits/chosen": -2.9896445274353027, | |
| "logits/rejected": -2.980637550354004, | |
| "logps/chosen": -349.469482421875, | |
| "logps/rejected": -325.8355712890625, | |
| "loss": 0.5145, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": 0.16838806867599487, | |
| "rewards/margins": 1.0479358434677124, | |
| "rewards/rejected": -0.8795478940010071, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7850834151128557, | |
| "eval_logits/chosen": -3.02506422996521, | |
| "eval_logits/rejected": -3.0537216663360596, | |
| "eval_logps/chosen": -328.9156188964844, | |
| "eval_logps/rejected": -322.50213623046875, | |
| "eval_loss": 0.48850810527801514, | |
| "eval_rewards/accuracies": 0.7524999976158142, | |
| "eval_rewards/chosen": 0.0447828434407711, | |
| "eval_rewards/margins": 1.0521847009658813, | |
| "eval_rewards/rejected": -1.0074018239974976, | |
| "eval_runtime": 170.8761, | |
| "eval_samples_per_second": 11.704, | |
| "eval_steps_per_second": 5.852, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.78900883218842, | |
| "grad_norm": 3.5479438304901123, | |
| "learning_rate": 4.2386185243328105e-06, | |
| "logits/chosen": -2.979447841644287, | |
| "logits/rejected": -3.07000994682312, | |
| "logps/chosen": -329.8793640136719, | |
| "logps/rejected": -326.6648864746094, | |
| "loss": 0.5044, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.15325433015823364, | |
| "rewards/margins": 1.0215200185775757, | |
| "rewards/rejected": -0.8682657480239868, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7929342492639843, | |
| "grad_norm": 5.247315883636475, | |
| "learning_rate": 4.160125588697018e-06, | |
| "logits/chosen": -2.996269702911377, | |
| "logits/rejected": -3.06060528755188, | |
| "logps/chosen": -315.26666259765625, | |
| "logps/rejected": -309.08868408203125, | |
| "loss": 0.5036, | |
| "rewards/accuracies": 0.7750000953674316, | |
| "rewards/chosen": 0.1521468460559845, | |
| "rewards/margins": 1.0418260097503662, | |
| "rewards/rejected": -0.8896790742874146, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7968596663395485, | |
| "grad_norm": 4.7451324462890625, | |
| "learning_rate": 4.081632653061225e-06, | |
| "logits/chosen": -3.060560464859009, | |
| "logits/rejected": -3.1156129837036133, | |
| "logps/chosen": -307.20135498046875, | |
| "logps/rejected": -289.0655212402344, | |
| "loss": 0.5071, | |
| "rewards/accuracies": 0.7666667103767395, | |
| "rewards/chosen": -0.054080985486507416, | |
| "rewards/margins": 1.019335150718689, | |
| "rewards/rejected": -1.0734161138534546, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8007850834151129, | |
| "grad_norm": 4.9453325271606445, | |
| "learning_rate": 4.003139717425432e-06, | |
| "logits/chosen": -3.077300548553467, | |
| "logits/rejected": -3.1319777965545654, | |
| "logps/chosen": -320.95050048828125, | |
| "logps/rejected": -289.74591064453125, | |
| "loss": 0.5736, | |
| "rewards/accuracies": 0.6958333849906921, | |
| "rewards/chosen": 0.06706535816192627, | |
| "rewards/margins": 0.8430485725402832, | |
| "rewards/rejected": -0.7759832143783569, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8047105004906772, | |
| "grad_norm": 5.086551189422607, | |
| "learning_rate": 3.9246467817896395e-06, | |
| "logits/chosen": -2.9904251098632812, | |
| "logits/rejected": -3.015242099761963, | |
| "logps/chosen": -348.0818176269531, | |
| "logps/rejected": -313.29937744140625, | |
| "loss": 0.5274, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.15665733814239502, | |
| "rewards/margins": 1.0903061628341675, | |
| "rewards/rejected": -0.933648943901062, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8086359175662414, | |
| "grad_norm": 3.6982407569885254, | |
| "learning_rate": 3.846153846153847e-06, | |
| "logits/chosen": -2.8871021270751953, | |
| "logits/rejected": -3.033609390258789, | |
| "logps/chosen": -319.4637145996094, | |
| "logps/rejected": -308.5705871582031, | |
| "loss": 0.4841, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": 0.15460513532161713, | |
| "rewards/margins": 1.0347096920013428, | |
| "rewards/rejected": -0.8801045417785645, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8125613346418057, | |
| "grad_norm": 4.721177101135254, | |
| "learning_rate": 3.767660910518054e-06, | |
| "logits/chosen": -3.060762882232666, | |
| "logits/rejected": -3.0055036544799805, | |
| "logps/chosen": -320.2731628417969, | |
| "logps/rejected": -321.6552734375, | |
| "loss": 0.5038, | |
| "rewards/accuracies": 0.7666667699813843, | |
| "rewards/chosen": 0.1908079981803894, | |
| "rewards/margins": 0.9346101880073547, | |
| "rewards/rejected": -0.7438021302223206, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.81648675171737, | |
| "grad_norm": 3.8394100666046143, | |
| "learning_rate": 3.6891679748822605e-06, | |
| "logits/chosen": -3.0200486183166504, | |
| "logits/rejected": -3.0659079551696777, | |
| "logps/chosen": -309.3419494628906, | |
| "logps/rejected": -303.4842834472656, | |
| "loss": 0.4625, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 0.19585837423801422, | |
| "rewards/margins": 1.1063092947006226, | |
| "rewards/rejected": -0.9104509353637695, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8204121687929342, | |
| "grad_norm": 4.434594631195068, | |
| "learning_rate": 3.6106750392464677e-06, | |
| "logits/chosen": -3.118605136871338, | |
| "logits/rejected": -3.0567848682403564, | |
| "logps/chosen": -320.66400146484375, | |
| "logps/rejected": -327.2590637207031, | |
| "loss": 0.5165, | |
| "rewards/accuracies": 0.7208333611488342, | |
| "rewards/chosen": 0.15826813876628876, | |
| "rewards/margins": 0.8408550024032593, | |
| "rewards/rejected": -0.6825869083404541, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8243375858684985, | |
| "grad_norm": 5.180295467376709, | |
| "learning_rate": 3.532182103610675e-06, | |
| "logits/chosen": -3.04954195022583, | |
| "logits/rejected": -3.0940709114074707, | |
| "logps/chosen": -333.8227233886719, | |
| "logps/rejected": -317.74530029296875, | |
| "loss": 0.5001, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": 0.304600328207016, | |
| "rewards/margins": 1.0906543731689453, | |
| "rewards/rejected": -0.7860540151596069, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8282630029440629, | |
| "grad_norm": 3.821493148803711, | |
| "learning_rate": 3.4536891679748822e-06, | |
| "logits/chosen": -2.9625072479248047, | |
| "logits/rejected": -3.038464069366455, | |
| "logps/chosen": -317.5421142578125, | |
| "logps/rejected": -318.5979309082031, | |
| "loss": 0.4845, | |
| "rewards/accuracies": 0.7541667222976685, | |
| "rewards/chosen": 0.11151299625635147, | |
| "rewards/margins": 1.0481318235397339, | |
| "rewards/rejected": -0.9366186857223511, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8321884200196271, | |
| "grad_norm": 5.562465190887451, | |
| "learning_rate": 3.3751962323390895e-06, | |
| "logits/chosen": -3.0714111328125, | |
| "logits/rejected": -3.0893895626068115, | |
| "logps/chosen": -363.3538513183594, | |
| "logps/rejected": -348.7355651855469, | |
| "loss": 0.487, | |
| "rewards/accuracies": 0.7458333969116211, | |
| "rewards/chosen": 0.14830578863620758, | |
| "rewards/margins": 1.0616153478622437, | |
| "rewards/rejected": -0.9133096933364868, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8361138370951914, | |
| "grad_norm": 4.553463459014893, | |
| "learning_rate": 3.2967032967032968e-06, | |
| "logits/chosen": -2.939422845840454, | |
| "logits/rejected": -2.907299518585205, | |
| "logps/chosen": -330.71014404296875, | |
| "logps/rejected": -317.1188049316406, | |
| "loss": 0.5002, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": 0.015831544995307922, | |
| "rewards/margins": 1.0325143337249756, | |
| "rewards/rejected": -1.016682744026184, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8400392541707556, | |
| "grad_norm": 3.9285788536071777, | |
| "learning_rate": 3.218210361067504e-06, | |
| "logits/chosen": -2.965064287185669, | |
| "logits/rejected": -3.0384631156921387, | |
| "logps/chosen": -334.41192626953125, | |
| "logps/rejected": -332.02899169921875, | |
| "loss": 0.4383, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.18648472428321838, | |
| "rewards/margins": 1.2396255731582642, | |
| "rewards/rejected": -1.0531408786773682, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8439646712463199, | |
| "grad_norm": 3.849515676498413, | |
| "learning_rate": 3.1397174254317113e-06, | |
| "logits/chosen": -3.0192911624908447, | |
| "logits/rejected": -3.064319610595703, | |
| "logps/chosen": -291.82257080078125, | |
| "logps/rejected": -276.48486328125, | |
| "loss": 0.5327, | |
| "rewards/accuracies": 0.7166666984558105, | |
| "rewards/chosen": -0.07870586216449738, | |
| "rewards/margins": 0.9807574152946472, | |
| "rewards/rejected": -1.059463381767273, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8478900883218842, | |
| "grad_norm": 4.2341814041137695, | |
| "learning_rate": 3.0612244897959185e-06, | |
| "logits/chosen": -3.021660566329956, | |
| "logits/rejected": -3.0418386459350586, | |
| "logps/chosen": -313.7025451660156, | |
| "logps/rejected": -316.4256591796875, | |
| "loss": 0.5029, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": 0.049659062176942825, | |
| "rewards/margins": 0.9659037590026855, | |
| "rewards/rejected": -0.9162446856498718, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8518155053974484, | |
| "grad_norm": 3.874643564224243, | |
| "learning_rate": 2.982731554160126e-06, | |
| "logits/chosen": -3.0091512203216553, | |
| "logits/rejected": -3.06247615814209, | |
| "logps/chosen": -333.6044616699219, | |
| "logps/rejected": -305.1171875, | |
| "loss": 0.5009, | |
| "rewards/accuracies": 0.7291666865348816, | |
| "rewards/chosen": 0.03525074943900108, | |
| "rewards/margins": 1.0619693994522095, | |
| "rewards/rejected": -1.0267184972763062, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8557409224730128, | |
| "grad_norm": 4.802616119384766, | |
| "learning_rate": 2.904238618524333e-06, | |
| "logits/chosen": -3.0582773685455322, | |
| "logits/rejected": -3.0825817584991455, | |
| "logps/chosen": -356.7900695800781, | |
| "logps/rejected": -343.6094665527344, | |
| "loss": 0.4828, | |
| "rewards/accuracies": 0.7583333849906921, | |
| "rewards/chosen": 0.11294318735599518, | |
| "rewards/margins": 1.103849172592163, | |
| "rewards/rejected": -0.9909059405326843, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8596663395485771, | |
| "grad_norm": 4.6176838874816895, | |
| "learning_rate": 2.8257456828885403e-06, | |
| "logits/chosen": -2.992724895477295, | |
| "logits/rejected": -3.021177053451538, | |
| "logps/chosen": -326.8610534667969, | |
| "logps/rejected": -327.75445556640625, | |
| "loss": 0.3941, | |
| "rewards/accuracies": 0.8166667222976685, | |
| "rewards/chosen": 0.09250589460134506, | |
| "rewards/margins": 1.3396714925765991, | |
| "rewards/rejected": -1.247165560722351, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.8635917566241413, | |
| "grad_norm": 4.353200912475586, | |
| "learning_rate": 2.7472527472527476e-06, | |
| "logits/chosen": -3.025132656097412, | |
| "logits/rejected": -3.075371503829956, | |
| "logps/chosen": -323.29315185546875, | |
| "logps/rejected": -304.34423828125, | |
| "loss": 0.456, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": 0.09489820152521133, | |
| "rewards/margins": 1.147526741027832, | |
| "rewards/rejected": -1.0526283979415894, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8635917566241413, | |
| "eval_logits/chosen": -3.0234742164611816, | |
| "eval_logits/rejected": -3.051736354827881, | |
| "eval_logps/chosen": -328.9091491699219, | |
| "eval_logps/rejected": -322.8738098144531, | |
| "eval_loss": 0.49041956663131714, | |
| "eval_rewards/accuracies": 0.7534999847412109, | |
| "eval_rewards/chosen": 0.04543456435203552, | |
| "eval_rewards/margins": 1.0900031328201294, | |
| "eval_rewards/rejected": -1.044568657875061, | |
| "eval_runtime": 170.9675, | |
| "eval_samples_per_second": 11.698, | |
| "eval_steps_per_second": 5.849, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8675171736997056, | |
| "grad_norm": 3.9487624168395996, | |
| "learning_rate": 2.668759811616955e-06, | |
| "logits/chosen": -2.9902524948120117, | |
| "logits/rejected": -3.033686876296997, | |
| "logps/chosen": -304.41265869140625, | |
| "logps/rejected": -300.32049560546875, | |
| "loss": 0.4872, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": 0.16270777583122253, | |
| "rewards/margins": 1.078561544418335, | |
| "rewards/rejected": -0.9158536195755005, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.8714425907752699, | |
| "grad_norm": 5.130923748016357, | |
| "learning_rate": 2.5902668759811617e-06, | |
| "logits/chosen": -2.932274580001831, | |
| "logits/rejected": -2.995884656906128, | |
| "logps/chosen": -318.7715148925781, | |
| "logps/rejected": -321.5521240234375, | |
| "loss": 0.5181, | |
| "rewards/accuracies": 0.7000000476837158, | |
| "rewards/chosen": -0.003421901259571314, | |
| "rewards/margins": 1.0905206203460693, | |
| "rewards/rejected": -1.0939425230026245, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8753680078508341, | |
| "grad_norm": 5.341976642608643, | |
| "learning_rate": 2.511773940345369e-06, | |
| "logits/chosen": -2.9918906688690186, | |
| "logits/rejected": -3.070976734161377, | |
| "logps/chosen": -328.22149658203125, | |
| "logps/rejected": -295.317138671875, | |
| "loss": 0.5221, | |
| "rewards/accuracies": 0.7583333253860474, | |
| "rewards/chosen": 0.2609195113182068, | |
| "rewards/margins": 1.2272056341171265, | |
| "rewards/rejected": -0.9662860631942749, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.8792934249263984, | |
| "grad_norm": 3.9076426029205322, | |
| "learning_rate": 2.4332810047095766e-06, | |
| "logits/chosen": -3.0119221210479736, | |
| "logits/rejected": -3.0435667037963867, | |
| "logps/chosen": -345.94036865234375, | |
| "logps/rejected": -323.3532409667969, | |
| "loss": 0.4533, | |
| "rewards/accuracies": 0.7750000357627869, | |
| "rewards/chosen": 0.23655609786510468, | |
| "rewards/margins": 1.205311894416809, | |
| "rewards/rejected": -0.9687557220458984, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8832188420019627, | |
| "grad_norm": 4.00649356842041, | |
| "learning_rate": 2.3547880690737835e-06, | |
| "logits/chosen": -3.0578651428222656, | |
| "logits/rejected": -3.1050515174865723, | |
| "logps/chosen": -293.8743896484375, | |
| "logps/rejected": -334.2582702636719, | |
| "loss": 0.5158, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": 0.27813708782196045, | |
| "rewards/margins": 1.1314489841461182, | |
| "rewards/rejected": -0.8533117175102234, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.887144259077527, | |
| "grad_norm": 5.170398712158203, | |
| "learning_rate": 2.2762951334379907e-06, | |
| "logits/chosen": -2.9842798709869385, | |
| "logits/rejected": -3.0326454639434814, | |
| "logps/chosen": -319.64111328125, | |
| "logps/rejected": -334.63641357421875, | |
| "loss": 0.5767, | |
| "rewards/accuracies": 0.6958334445953369, | |
| "rewards/chosen": 0.04911806434392929, | |
| "rewards/margins": 1.029329538345337, | |
| "rewards/rejected": -0.9802114367485046, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8910696761530913, | |
| "grad_norm": 4.750176906585693, | |
| "learning_rate": 2.197802197802198e-06, | |
| "logits/chosen": -3.024641275405884, | |
| "logits/rejected": -3.0461010932922363, | |
| "logps/chosen": -344.88226318359375, | |
| "logps/rejected": -349.8857727050781, | |
| "loss": 0.4812, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": 0.08117427676916122, | |
| "rewards/margins": 1.2805150747299194, | |
| "rewards/rejected": -1.1993409395217896, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.8949950932286556, | |
| "grad_norm": 4.3012471199035645, | |
| "learning_rate": 2.1193092621664052e-06, | |
| "logits/chosen": -2.9693052768707275, | |
| "logits/rejected": -3.012446165084839, | |
| "logps/chosen": -332.40740966796875, | |
| "logps/rejected": -347.2129211425781, | |
| "loss": 0.4836, | |
| "rewards/accuracies": 0.783333420753479, | |
| "rewards/chosen": 0.11985665559768677, | |
| "rewards/margins": 1.132505178451538, | |
| "rewards/rejected": -1.012648582458496, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8989205103042198, | |
| "grad_norm": 4.1170196533203125, | |
| "learning_rate": 2.0408163265306125e-06, | |
| "logits/chosen": -2.9982786178588867, | |
| "logits/rejected": -2.985097646713257, | |
| "logps/chosen": -325.58453369140625, | |
| "logps/rejected": -318.0592346191406, | |
| "loss": 0.4191, | |
| "rewards/accuracies": 0.8125001192092896, | |
| "rewards/chosen": 0.2217942774295807, | |
| "rewards/margins": 1.308205485343933, | |
| "rewards/rejected": -1.0864112377166748, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.9028459273797841, | |
| "grad_norm": 6.188891887664795, | |
| "learning_rate": 1.9623233908948198e-06, | |
| "logits/chosen": -2.999929189682007, | |
| "logits/rejected": -3.010659694671631, | |
| "logps/chosen": -335.7912902832031, | |
| "logps/rejected": -308.9612731933594, | |
| "loss": 0.4935, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.025047356262803078, | |
| "rewards/margins": 1.0598541498184204, | |
| "rewards/rejected": -1.0849015712738037, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9067713444553483, | |
| "grad_norm": 4.374786376953125, | |
| "learning_rate": 1.883830455259027e-06, | |
| "logits/chosen": -2.9940290451049805, | |
| "logits/rejected": -3.0808780193328857, | |
| "logps/chosen": -332.26287841796875, | |
| "logps/rejected": -301.36590576171875, | |
| "loss": 0.5517, | |
| "rewards/accuracies": 0.7166666984558105, | |
| "rewards/chosen": 0.07976453751325607, | |
| "rewards/margins": 0.9496763348579407, | |
| "rewards/rejected": -0.8699118494987488, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.9106967615309126, | |
| "grad_norm": 5.303534030914307, | |
| "learning_rate": 1.8053375196232339e-06, | |
| "logits/chosen": -2.9458823204040527, | |
| "logits/rejected": -3.05169939994812, | |
| "logps/chosen": -350.2687072753906, | |
| "logps/rejected": -344.89892578125, | |
| "loss": 0.529, | |
| "rewards/accuracies": 0.7041667103767395, | |
| "rewards/chosen": -0.006747332401573658, | |
| "rewards/margins": 1.0622098445892334, | |
| "rewards/rejected": -1.0689570903778076, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.914622178606477, | |
| "grad_norm": 4.187100887298584, | |
| "learning_rate": 1.7268445839874411e-06, | |
| "logits/chosen": -3.0305941104888916, | |
| "logits/rejected": -3.0799167156219482, | |
| "logps/chosen": -311.58660888671875, | |
| "logps/rejected": -297.4324035644531, | |
| "loss": 0.5227, | |
| "rewards/accuracies": 0.73333340883255, | |
| "rewards/chosen": 0.021794170141220093, | |
| "rewards/margins": 0.9995294809341431, | |
| "rewards/rejected": -0.977735161781311, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9185475956820413, | |
| "grad_norm": 3.8420519828796387, | |
| "learning_rate": 1.6483516483516484e-06, | |
| "logits/chosen": -3.073319911956787, | |
| "logits/rejected": -3.1020355224609375, | |
| "logps/chosen": -323.134521484375, | |
| "logps/rejected": -314.55340576171875, | |
| "loss": 0.453, | |
| "rewards/accuracies": 0.7666667699813843, | |
| "rewards/chosen": 0.005049190018326044, | |
| "rewards/margins": 1.1697251796722412, | |
| "rewards/rejected": -1.1646759510040283, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9224730127576055, | |
| "grad_norm": 4.952281951904297, | |
| "learning_rate": 1.5698587127158556e-06, | |
| "logits/chosen": -2.9573421478271484, | |
| "logits/rejected": -3.016396999359131, | |
| "logps/chosen": -336.82427978515625, | |
| "logps/rejected": -305.33697509765625, | |
| "loss": 0.4634, | |
| "rewards/accuracies": 0.7416666746139526, | |
| "rewards/chosen": 0.049565743654966354, | |
| "rewards/margins": 1.13016676902771, | |
| "rewards/rejected": -1.0806009769439697, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9263984298331698, | |
| "grad_norm": 4.881412029266357, | |
| "learning_rate": 1.491365777080063e-06, | |
| "logits/chosen": -3.013920783996582, | |
| "logits/rejected": -3.0628743171691895, | |
| "logps/chosen": -325.85760498046875, | |
| "logps/rejected": -325.6579284667969, | |
| "loss": 0.5157, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": -0.006006541661918163, | |
| "rewards/margins": 1.0770210027694702, | |
| "rewards/rejected": -1.0830276012420654, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.930323846908734, | |
| "grad_norm": 4.166913986206055, | |
| "learning_rate": 1.4128728414442702e-06, | |
| "logits/chosen": -3.0066945552825928, | |
| "logits/rejected": -3.021183490753174, | |
| "logps/chosen": -298.1296081542969, | |
| "logps/rejected": -306.4847106933594, | |
| "loss": 0.4665, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.030468011274933815, | |
| "rewards/margins": 1.1519229412078857, | |
| "rewards/rejected": -1.1214549541473389, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.9342492639842983, | |
| "grad_norm": 5.559418678283691, | |
| "learning_rate": 1.3343799058084774e-06, | |
| "logits/chosen": -3.032975912094116, | |
| "logits/rejected": -3.018131732940674, | |
| "logps/chosen": -299.36419677734375, | |
| "logps/rejected": -299.8133239746094, | |
| "loss": 0.5393, | |
| "rewards/accuracies": 0.6875000596046448, | |
| "rewards/chosen": 0.018437325954437256, | |
| "rewards/margins": 0.9440910220146179, | |
| "rewards/rejected": -0.9256537556648254, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9381746810598626, | |
| "grad_norm": 5.020077228546143, | |
| "learning_rate": 1.2558869701726845e-06, | |
| "logits/chosen": -2.986281156539917, | |
| "logits/rejected": -3.06579852104187, | |
| "logps/chosen": -341.192626953125, | |
| "logps/rejected": -319.23809814453125, | |
| "loss": 0.5241, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.07408008724451065, | |
| "rewards/margins": 1.0439367294311523, | |
| "rewards/rejected": -1.1180168390274048, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.9421000981354269, | |
| "grad_norm": 4.814427375793457, | |
| "learning_rate": 1.1773940345368917e-06, | |
| "logits/chosen": -3.0177111625671387, | |
| "logits/rejected": -3.044379711151123, | |
| "logps/chosen": -326.84661865234375, | |
| "logps/rejected": -306.0335388183594, | |
| "loss": 0.4989, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.0778479278087616, | |
| "rewards/margins": 1.1605087518692017, | |
| "rewards/rejected": -1.2383568286895752, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9421000981354269, | |
| "eval_logits/chosen": -3.0254786014556885, | |
| "eval_logits/rejected": -3.0536904335021973, | |
| "eval_logps/chosen": -329.79644775390625, | |
| "eval_logps/rejected": -323.7355651855469, | |
| "eval_loss": 0.4862891137599945, | |
| "eval_rewards/accuracies": 0.7605000138282776, | |
| "eval_rewards/chosen": -0.043296121060848236, | |
| "eval_rewards/margins": 1.0874476432800293, | |
| "eval_rewards/rejected": -1.1307436227798462, | |
| "eval_runtime": 170.5812, | |
| "eval_samples_per_second": 11.725, | |
| "eval_steps_per_second": 5.862, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9460255152109912, | |
| "grad_norm": 4.624739646911621, | |
| "learning_rate": 1.098901098901099e-06, | |
| "logits/chosen": -3.055946111679077, | |
| "logits/rejected": -3.1179323196411133, | |
| "logps/chosen": -363.6905822753906, | |
| "logps/rejected": -346.05767822265625, | |
| "loss": 0.4542, | |
| "rewards/accuracies": 0.7750000357627869, | |
| "rewards/chosen": -0.008908344432711601, | |
| "rewards/margins": 1.0937221050262451, | |
| "rewards/rejected": -1.102630376815796, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.9499509322865555, | |
| "grad_norm": 3.9496641159057617, | |
| "learning_rate": 1.0204081632653063e-06, | |
| "logits/chosen": -2.9469313621520996, | |
| "logits/rejected": -3.045012950897217, | |
| "logps/chosen": -328.9994201660156, | |
| "logps/rejected": -339.7306213378906, | |
| "loss": 0.4544, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.06163526326417923, | |
| "rewards/margins": 1.2660752534866333, | |
| "rewards/rejected": -1.327710509300232, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.9538763493621197, | |
| "grad_norm": 4.550204753875732, | |
| "learning_rate": 9.419152276295135e-07, | |
| "logits/chosen": -3.0455386638641357, | |
| "logits/rejected": -3.007603168487549, | |
| "logps/chosen": -321.6459655761719, | |
| "logps/rejected": -317.13592529296875, | |
| "loss": 0.518, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.07271427661180496, | |
| "rewards/margins": 1.1276568174362183, | |
| "rewards/rejected": -1.2003710269927979, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.957801766437684, | |
| "grad_norm": 3.7910659313201904, | |
| "learning_rate": 8.634222919937206e-07, | |
| "logits/chosen": -2.887434720993042, | |
| "logits/rejected": -3.0296366214752197, | |
| "logps/chosen": -328.447021484375, | |
| "logps/rejected": -312.25146484375, | |
| "loss": 0.5169, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": -0.016386663541197777, | |
| "rewards/margins": 1.084718942642212, | |
| "rewards/rejected": -1.1011055707931519, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.9617271835132483, | |
| "grad_norm": 4.135540008544922, | |
| "learning_rate": 7.849293563579278e-07, | |
| "logits/chosen": -3.051018714904785, | |
| "logits/rejected": -3.042524814605713, | |
| "logps/chosen": -303.7222900390625, | |
| "logps/rejected": -311.6209411621094, | |
| "loss": 0.4563, | |
| "rewards/accuracies": 0.7625001072883606, | |
| "rewards/chosen": 0.08631271123886108, | |
| "rewards/margins": 1.19536554813385, | |
| "rewards/rejected": -1.1090528964996338, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.9656526005888125, | |
| "grad_norm": 4.80025577545166, | |
| "learning_rate": 7.064364207221351e-07, | |
| "logits/chosen": -3.0100882053375244, | |
| "logits/rejected": -3.033210039138794, | |
| "logps/chosen": -297.29388427734375, | |
| "logps/rejected": -312.75390625, | |
| "loss": 0.5043, | |
| "rewards/accuracies": 0.7416667342185974, | |
| "rewards/chosen": -0.07621364295482635, | |
| "rewards/margins": 0.9946663975715637, | |
| "rewards/rejected": -1.0708800554275513, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.9695780176643768, | |
| "grad_norm": 4.171872138977051, | |
| "learning_rate": 6.279434850863422e-07, | |
| "logits/chosen": -2.984192371368408, | |
| "logits/rejected": -2.9866414070129395, | |
| "logps/chosen": -315.4156799316406, | |
| "logps/rejected": -311.60174560546875, | |
| "loss": 0.5081, | |
| "rewards/accuracies": 0.7291667461395264, | |
| "rewards/chosen": -0.02722536399960518, | |
| "rewards/margins": 1.0488277673721313, | |
| "rewards/rejected": -1.0760531425476074, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.9735034347399412, | |
| "grad_norm": 4.678730487823486, | |
| "learning_rate": 5.494505494505495e-07, | |
| "logits/chosen": -3.045713424682617, | |
| "logits/rejected": -3.0048978328704834, | |
| "logps/chosen": -355.97918701171875, | |
| "logps/rejected": -362.16143798828125, | |
| "loss": 0.4872, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.0551709420979023, | |
| "rewards/margins": 1.1289502382278442, | |
| "rewards/rejected": -1.184121012687683, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9774288518155054, | |
| "grad_norm": 5.555654048919678, | |
| "learning_rate": 4.7095761381475676e-07, | |
| "logits/chosen": -3.045968532562256, | |
| "logits/rejected": -3.028806686401367, | |
| "logps/chosen": -362.6111755371094, | |
| "logps/rejected": -342.56658935546875, | |
| "loss": 0.4904, | |
| "rewards/accuracies": 0.7916666865348816, | |
| "rewards/chosen": -0.12567153573036194, | |
| "rewards/margins": 1.1269028186798096, | |
| "rewards/rejected": -1.2525743246078491, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.9813542688910697, | |
| "grad_norm": 4.789409637451172, | |
| "learning_rate": 3.924646781789639e-07, | |
| "logits/chosen": -2.9453186988830566, | |
| "logits/rejected": -3.032778739929199, | |
| "logps/chosen": -335.04803466796875, | |
| "logps/rejected": -338.70782470703125, | |
| "loss": 0.5291, | |
| "rewards/accuracies": 0.7208333611488342, | |
| "rewards/chosen": -0.0959320068359375, | |
| "rewards/margins": 1.0104596614837646, | |
| "rewards/rejected": -1.1063916683197021, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.985279685966634, | |
| "grad_norm": 4.851970672607422, | |
| "learning_rate": 3.139717425431711e-07, | |
| "logits/chosen": -3.0085787773132324, | |
| "logits/rejected": -3.033092975616455, | |
| "logps/chosen": -294.70330810546875, | |
| "logps/rejected": -298.2593994140625, | |
| "loss": 0.4952, | |
| "rewards/accuracies": 0.720833420753479, | |
| "rewards/chosen": -0.07039856910705566, | |
| "rewards/margins": 1.0396531820297241, | |
| "rewards/rejected": -1.1100517511367798, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.9892051030421982, | |
| "grad_norm": 4.407826900482178, | |
| "learning_rate": 2.3547880690737838e-07, | |
| "logits/chosen": -2.992248296737671, | |
| "logits/rejected": -3.090275287628174, | |
| "logps/chosen": -361.5509338378906, | |
| "logps/rejected": -348.03094482421875, | |
| "loss": 0.4522, | |
| "rewards/accuracies": 0.7833333611488342, | |
| "rewards/chosen": -0.02963084913790226, | |
| "rewards/margins": 1.1609312295913696, | |
| "rewards/rejected": -1.1905620098114014, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.9931305201177625, | |
| "grad_norm": 5.248498916625977, | |
| "learning_rate": 1.5698587127158556e-07, | |
| "logits/chosen": -3.013667345046997, | |
| "logits/rejected": -2.995178461074829, | |
| "logps/chosen": -306.033447265625, | |
| "logps/rejected": -311.38470458984375, | |
| "loss": 0.5122, | |
| "rewards/accuracies": 0.7666666507720947, | |
| "rewards/chosen": -0.04827199503779411, | |
| "rewards/margins": 1.0230185985565186, | |
| "rewards/rejected": -1.0712906122207642, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.9970559371933267, | |
| "grad_norm": 4.212546348571777, | |
| "learning_rate": 7.849293563579278e-08, | |
| "logits/chosen": -3.0183610916137695, | |
| "logits/rejected": -3.0684821605682373, | |
| "logps/chosen": -313.4974060058594, | |
| "logps/rejected": -312.05755615234375, | |
| "loss": 0.5292, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.12905262410640717, | |
| "rewards/margins": 0.9748676419258118, | |
| "rewards/rejected": -1.1039202213287354, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1274, | |
| "total_flos": 0.0, | |
| "train_loss": 0.5089355802610868, | |
| "train_runtime": 12172.1578, | |
| "train_samples_per_second": 5.023, | |
| "train_steps_per_second": 0.105 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1274, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |