| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.988679245283019, |
| "eval_steps": 500, |
| "global_step": 396, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "grad_norm": 8.418300067215691, |
| "learning_rate": 1.25e-08, |
| "logps/chosen": -39.02219009399414, |
| "logps/rejected": -45.12399673461914, |
| "loss": 0.6931, |
| "losses/dpo": 0.6931471824645996, |
| "losses/sft": 1.552122950553894, |
| "losses/total": 0.6931471824645996, |
| "ref_logps/chosen": -39.02219009399414, |
| "ref_logps/rejected": -45.12399673461914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 7.6721075942113535, |
| "learning_rate": 2.5e-08, |
| "logps/chosen": -37.21428680419922, |
| "logps/rejected": -44.4819221496582, |
| "loss": 0.6931, |
| "losses/dpo": 0.6931471824645996, |
| "losses/sft": 1.6663763523101807, |
| "losses/total": 0.6931471824645996, |
| "ref_logps/chosen": -37.21428680419922, |
| "ref_logps/rejected": -44.4819221496582, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 7.948340327346078, |
| "learning_rate": 3.75e-08, |
| "logps/chosen": -41.46142578125, |
| "logps/rejected": -52.18663024902344, |
| "loss": 0.6926, |
| "losses/dpo": 0.6867616176605225, |
| "losses/sft": 1.7890703678131104, |
| "losses/total": 0.6867616176605225, |
| "ref_logps/chosen": -41.46522903442383, |
| "ref_logps/rejected": -52.1768798828125, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": 0.0003804098814725876, |
| "rewards/margins": 0.0013556077610701323, |
| "rewards/rejected": -0.0009751979378052056, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 8.038993728778431, |
| "learning_rate": 5e-08, |
| "logps/chosen": -39.45478057861328, |
| "logps/rejected": -45.85334014892578, |
| "loss": 0.6936, |
| "losses/dpo": 0.6930198073387146, |
| "losses/sft": 1.6549196243286133, |
| "losses/total": 0.6930198073387146, |
| "ref_logps/chosen": -39.42698287963867, |
| "ref_logps/rejected": -45.83390426635742, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.002779680071398616, |
| "rewards/margins": -0.0008358716731891036, |
| "rewards/rejected": -0.001943808514624834, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 7.749366133023054, |
| "learning_rate": 6.25e-08, |
| "logps/chosen": -44.29286193847656, |
| "logps/rejected": -51.08875274658203, |
| "loss": 0.695, |
| "losses/dpo": 0.6971508264541626, |
| "losses/sft": 1.3629276752471924, |
| "losses/total": 0.6971508264541626, |
| "ref_logps/chosen": -44.301361083984375, |
| "ref_logps/rejected": -51.132286071777344, |
| "rewards/accuracies": 0.4453125, |
| "rewards/chosen": 0.0008498989045619965, |
| "rewards/margins": -0.003503247397020459, |
| "rewards/rejected": 0.004353146068751812, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 7.545034174633802, |
| "learning_rate": 7.5e-08, |
| "logps/chosen": -37.192138671875, |
| "logps/rejected": -44.56536865234375, |
| "loss": 0.693, |
| "losses/dpo": 0.6901252269744873, |
| "losses/sft": 1.235260248184204, |
| "losses/total": 0.6901252269744873, |
| "ref_logps/chosen": -37.197486877441406, |
| "ref_logps/rejected": -44.56662368774414, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": 0.000535178929567337, |
| "rewards/margins": 0.000409391475841403, |
| "rewards/rejected": 0.00012578748282976449, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 7.645026557219469, |
| "learning_rate": 8.75e-08, |
| "logps/chosen": -40.067909240722656, |
| "logps/rejected": -46.251487731933594, |
| "loss": 0.6941, |
| "losses/dpo": 0.695063054561615, |
| "losses/sft": 1.8211989402770996, |
| "losses/total": 0.695063054561615, |
| "ref_logps/chosen": -40.05988311767578, |
| "ref_logps/rejected": -46.26015090942383, |
| "rewards/accuracies": 0.453125, |
| "rewards/chosen": -0.0008021063404157758, |
| "rewards/margins": -0.001668928423896432, |
| "rewards/rejected": 0.000866822199895978, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 8.091132576285203, |
| "learning_rate": 1e-07, |
| "logps/chosen": -44.73344802856445, |
| "logps/rejected": -46.818824768066406, |
| "loss": 0.6949, |
| "losses/dpo": 0.6943204402923584, |
| "losses/sft": 1.470657229423523, |
| "losses/total": 0.6943204402923584, |
| "ref_logps/chosen": -44.7131233215332, |
| "ref_logps/rejected": -46.83186340332031, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": -0.0020323917269706726, |
| "rewards/margins": -0.0033366940915584564, |
| "rewards/rejected": 0.0013043024810031056, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 7.987567478058529, |
| "learning_rate": 1.125e-07, |
| "logps/chosen": -40.069969177246094, |
| "logps/rejected": -50.12396240234375, |
| "loss": 0.6944, |
| "losses/dpo": 0.6880265474319458, |
| "losses/sft": 1.1660749912261963, |
| "losses/total": 0.6880265474319458, |
| "ref_logps/chosen": -40.05424118041992, |
| "ref_logps/rejected": -50.13201904296875, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -0.0015729822916910052, |
| "rewards/margins": -0.002378995530307293, |
| "rewards/rejected": 0.000806013063993305, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.458190702935405, |
| "learning_rate": 1.25e-07, |
| "logps/chosen": -36.18950271606445, |
| "logps/rejected": -45.11402130126953, |
| "loss": 0.694, |
| "losses/dpo": 0.6928867697715759, |
| "losses/sft": 1.605255365371704, |
| "losses/total": 0.6928867697715759, |
| "ref_logps/chosen": -36.18606185913086, |
| "ref_logps/rejected": -45.12499237060547, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -0.00034411592059768736, |
| "rewards/margins": -0.0014411872252821922, |
| "rewards/rejected": 0.0010970717994496226, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.498431737676318, |
| "learning_rate": 1.375e-07, |
| "logps/chosen": -38.895912170410156, |
| "logps/rejected": -44.2772216796875, |
| "loss": 0.6922, |
| "losses/dpo": 0.6932664513587952, |
| "losses/sft": 1.4097728729248047, |
| "losses/total": 0.6932664513587952, |
| "ref_logps/chosen": -38.896759033203125, |
| "ref_logps/rejected": -44.25825881958008, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 8.490856271237135e-05, |
| "rewards/margins": 0.0019812812097370625, |
| "rewards/rejected": -0.001896372647024691, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 7.699329532824058, |
| "learning_rate": 1.5e-07, |
| "logps/chosen": -41.140281677246094, |
| "logps/rejected": -45.357364654541016, |
| "loss": 0.6914, |
| "losses/dpo": 0.6933699250221252, |
| "losses/sft": 1.6783134937286377, |
| "losses/total": 0.6933699250221252, |
| "ref_logps/chosen": -41.16625213623047, |
| "ref_logps/rejected": -45.34600830078125, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.002597447484731674, |
| "rewards/margins": 0.0037334603257477283, |
| "rewards/rejected": -0.0011360126081854105, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 7.69326977444187, |
| "learning_rate": 1.625e-07, |
| "logps/chosen": -40.31540298461914, |
| "logps/rejected": -50.180397033691406, |
| "loss": 0.6925, |
| "losses/dpo": 0.6910371780395508, |
| "losses/sft": 1.366438865661621, |
| "losses/total": 0.6910371780395508, |
| "ref_logps/chosen": -40.30924987792969, |
| "ref_logps/rejected": -50.16073989868164, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.0006153000867925584, |
| "rewards/margins": 0.0013502361252903938, |
| "rewards/rejected": -0.0019655367359519005, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 8.114372207024019, |
| "learning_rate": 1.75e-07, |
| "logps/chosen": -37.29108428955078, |
| "logps/rejected": -44.525848388671875, |
| "loss": 0.6911, |
| "losses/dpo": 0.6899442076683044, |
| "losses/sft": 1.4768216609954834, |
| "losses/total": 0.6899442076683044, |
| "ref_logps/chosen": -37.311187744140625, |
| "ref_logps/rejected": -44.50410079956055, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": 0.0020105522125959396, |
| "rewards/margins": 0.004185608588159084, |
| "rewards/rejected": -0.0021750556770712137, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 7.835230950103234, |
| "learning_rate": 1.875e-07, |
| "logps/chosen": -38.33734893798828, |
| "logps/rejected": -43.93443298339844, |
| "loss": 0.6932, |
| "losses/dpo": 0.6937546133995056, |
| "losses/sft": 1.4263617992401123, |
| "losses/total": 0.6937546133995056, |
| "ref_logps/chosen": -38.31421661376953, |
| "ref_logps/rejected": -43.90996170043945, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.0023131906054913998, |
| "rewards/margins": 0.00013422727352008224, |
| "rewards/rejected": -0.002447417937219143, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.58949645380321, |
| "learning_rate": 2e-07, |
| "logps/chosen": -40.26841735839844, |
| "logps/rejected": -43.40159225463867, |
| "loss": 0.6934, |
| "losses/dpo": 0.7032474279403687, |
| "losses/sft": 1.5701673030853271, |
| "losses/total": 0.7032474279403687, |
| "ref_logps/chosen": -40.24923324584961, |
| "ref_logps/rejected": -43.38625717163086, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -0.0019177356734871864, |
| "rewards/margins": -0.0003846373874694109, |
| "rewards/rejected": -0.0015330985188484192, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 7.829467009021115, |
| "learning_rate": 2.1249999999999998e-07, |
| "logps/chosen": -41.63703536987305, |
| "logps/rejected": -46.70919418334961, |
| "loss": 0.6952, |
| "losses/dpo": 0.6850873231887817, |
| "losses/sft": 1.4479947090148926, |
| "losses/total": 0.6850873231887817, |
| "ref_logps/chosen": -41.58295822143555, |
| "ref_logps/rejected": -46.69389343261719, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.005406979937106371, |
| "rewards/margins": -0.003876863745972514, |
| "rewards/rejected": -0.0015301161911338568, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.485609028305383, |
| "learning_rate": 2.25e-07, |
| "logps/chosen": -40.4469108581543, |
| "logps/rejected": -44.94635009765625, |
| "loss": 0.6927, |
| "losses/dpo": 0.6907854080200195, |
| "losses/sft": 1.1833800077438354, |
| "losses/total": 0.6907854080200195, |
| "ref_logps/chosen": -40.407257080078125, |
| "ref_logps/rejected": -44.894813537597656, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.003965577110648155, |
| "rewards/margins": 0.0011882353574037552, |
| "rewards/rejected": -0.005153812933713198, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.404661140565325, |
| "learning_rate": 2.3749999999999998e-07, |
| "logps/chosen": -35.739524841308594, |
| "logps/rejected": -46.330265045166016, |
| "loss": 0.6923, |
| "losses/dpo": 0.6935074329376221, |
| "losses/sft": 1.8608835935592651, |
| "losses/total": 0.6935074329376221, |
| "ref_logps/chosen": -35.708274841308594, |
| "ref_logps/rejected": -46.27949523925781, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -0.003125070594251156, |
| "rewards/margins": 0.001951692276634276, |
| "rewards/rejected": -0.00507676275447011, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.514252016049334, |
| "learning_rate": 2.5e-07, |
| "logps/chosen": -40.884029388427734, |
| "logps/rejected": -47.1005859375, |
| "loss": 0.6939, |
| "losses/dpo": 0.6919451355934143, |
| "losses/sft": 1.3290549516677856, |
| "losses/total": 0.6919451355934143, |
| "ref_logps/chosen": -40.82601547241211, |
| "ref_logps/rejected": -47.054588317871094, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.0058018057607114315, |
| "rewards/margins": -0.001202343963086605, |
| "rewards/rejected": -0.004599461797624826, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 11.774765376903847, |
| "learning_rate": 2.625e-07, |
| "logps/chosen": -40.38345718383789, |
| "logps/rejected": -51.66474914550781, |
| "loss": 0.6922, |
| "losses/dpo": 0.6949824690818787, |
| "losses/sft": 1.548210620880127, |
| "losses/total": 0.6949824690818787, |
| "ref_logps/chosen": -40.31751251220703, |
| "ref_logps/rejected": -51.57673645019531, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.006594239268451929, |
| "rewards/margins": 0.002207120880484581, |
| "rewards/rejected": -0.008801360614597797, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 7.542951860866026, |
| "learning_rate": 2.75e-07, |
| "logps/chosen": -38.18585205078125, |
| "logps/rejected": -47.13993835449219, |
| "loss": 0.6924, |
| "losses/dpo": 0.6899946928024292, |
| "losses/sft": 1.1579391956329346, |
| "losses/total": 0.6899946928024292, |
| "ref_logps/chosen": -38.091922760009766, |
| "ref_logps/rejected": -47.029541015625, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -0.009392979554831982, |
| "rewards/margins": 0.0016468917019665241, |
| "rewards/rejected": -0.011039872653782368, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 7.876385596657873, |
| "learning_rate": 2.8749999999999995e-07, |
| "logps/chosen": -38.30439376831055, |
| "logps/rejected": -49.61843490600586, |
| "loss": 0.6895, |
| "losses/dpo": 0.6851339936256409, |
| "losses/sft": 1.5843318700790405, |
| "losses/total": 0.6851339936256409, |
| "ref_logps/chosen": -38.242801666259766, |
| "ref_logps/rejected": -49.4824104309082, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.006159077398478985, |
| "rewards/margins": 0.007443387992680073, |
| "rewards/rejected": -0.013602466322481632, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 7.668314504955077, |
| "learning_rate": 3e-07, |
| "logps/chosen": -40.48552322387695, |
| "logps/rejected": -46.48503494262695, |
| "loss": 0.6895, |
| "losses/dpo": 0.6920894980430603, |
| "losses/sft": 1.4189947843551636, |
| "losses/total": 0.6920894980430603, |
| "ref_logps/chosen": -40.395687103271484, |
| "ref_logps/rejected": -46.31945037841797, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.008983338251709938, |
| "rewards/margins": 0.007574939634650946, |
| "rewards/rejected": -0.01655827835202217, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 7.344435357199023, |
| "learning_rate": 3.1249999999999997e-07, |
| "logps/chosen": -39.81501388549805, |
| "logps/rejected": -45.954071044921875, |
| "loss": 0.6914, |
| "losses/dpo": 0.6927582621574402, |
| "losses/sft": 1.6129871606826782, |
| "losses/total": 0.6927582621574402, |
| "ref_logps/chosen": -39.700050354003906, |
| "ref_logps/rejected": -45.80275344848633, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.011496355757117271, |
| "rewards/margins": 0.0036351331509649754, |
| "rewards/rejected": -0.01513148844242096, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.977738556938539, |
| "learning_rate": 3.25e-07, |
| "logps/chosen": -39.41866683959961, |
| "logps/rejected": -47.94341278076172, |
| "loss": 0.6919, |
| "losses/dpo": 0.6823984384536743, |
| "losses/sft": 1.1218098402023315, |
| "losses/total": 0.6823984384536743, |
| "ref_logps/chosen": -39.244632720947266, |
| "ref_logps/rejected": -47.742191314697266, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.017403149977326393, |
| "rewards/margins": 0.002718748524785042, |
| "rewards/rejected": -0.020121898502111435, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.137045291395353, |
| "learning_rate": 3.375e-07, |
| "logps/chosen": -36.09528350830078, |
| "logps/rejected": -43.786441802978516, |
| "loss": 0.6934, |
| "losses/dpo": 0.7064226269721985, |
| "losses/sft": 1.185333251953125, |
| "losses/total": 0.7064226269721985, |
| "ref_logps/chosen": -35.91387939453125, |
| "ref_logps/rejected": -43.607757568359375, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -0.01814102753996849, |
| "rewards/margins": -0.0002719040203373879, |
| "rewards/rejected": -0.017869124189019203, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 7.379899822037054, |
| "learning_rate": 3.5e-07, |
| "logps/chosen": -44.72355270385742, |
| "logps/rejected": -47.34676742553711, |
| "loss": 0.6909, |
| "losses/dpo": 0.6887847185134888, |
| "losses/sft": 1.6178021430969238, |
| "losses/total": 0.6887847185134888, |
| "ref_logps/chosen": -44.496578216552734, |
| "ref_logps/rejected": -47.07260513305664, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.02269744500517845, |
| "rewards/margins": 0.004718274809420109, |
| "rewards/rejected": -0.027415720745921135, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 8.0485668325091, |
| "learning_rate": 3.6249999999999997e-07, |
| "logps/chosen": -41.700931549072266, |
| "logps/rejected": -50.03131103515625, |
| "loss": 0.6893, |
| "losses/dpo": 0.6823500394821167, |
| "losses/sft": 1.4876271486282349, |
| "losses/total": 0.6823500394821167, |
| "ref_logps/chosen": -41.42716979980469, |
| "ref_logps/rejected": -49.678436279296875, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.027376368641853333, |
| "rewards/margins": 0.007911860011518002, |
| "rewards/rejected": -0.03528822585940361, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 7.828639871809106, |
| "learning_rate": 3.75e-07, |
| "logps/chosen": -41.79233932495117, |
| "logps/rejected": -48.521629333496094, |
| "loss": 0.6884, |
| "losses/dpo": 0.6861717700958252, |
| "losses/sft": 1.3226033449172974, |
| "losses/total": 0.6861717700958252, |
| "ref_logps/chosen": -41.542991638183594, |
| "ref_logps/rejected": -48.17402648925781, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -0.024935012683272362, |
| "rewards/margins": 0.009825671091675758, |
| "rewards/rejected": -0.03476068750023842, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 8.365578824729102, |
| "learning_rate": 3.875e-07, |
| "logps/chosen": -41.142433166503906, |
| "logps/rejected": -48.93161392211914, |
| "loss": 0.6954, |
| "losses/dpo": 0.6871266961097717, |
| "losses/sft": 1.0435420274734497, |
| "losses/total": 0.6871266961097717, |
| "ref_logps/chosen": -40.73477554321289, |
| "ref_logps/rejected": -48.56393814086914, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.04076562076807022, |
| "rewards/margins": -0.00399819714948535, |
| "rewards/rejected": -0.03676741570234299, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 7.2403699625390585, |
| "learning_rate": 4e-07, |
| "logps/chosen": -37.81801223754883, |
| "logps/rejected": -46.747371673583984, |
| "loss": 0.6899, |
| "losses/dpo": 0.696927011013031, |
| "losses/sft": 1.7572060823440552, |
| "losses/total": 0.696927011013031, |
| "ref_logps/chosen": -37.4744758605957, |
| "ref_logps/rejected": -46.330135345458984, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.03435356542468071, |
| "rewards/margins": 0.007370149716734886, |
| "rewards/rejected": -0.04172371327877045, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 7.310694198303384, |
| "learning_rate": 4.1249999999999997e-07, |
| "logps/chosen": -35.633243560791016, |
| "logps/rejected": -41.00613021850586, |
| "loss": 0.6852, |
| "losses/dpo": 0.6834661960601807, |
| "losses/sft": 1.3767448663711548, |
| "losses/total": 0.6834661960601807, |
| "ref_logps/chosen": -35.318153381347656, |
| "ref_logps/rejected": -40.5264892578125, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.03150848299264908, |
| "rewards/margins": 0.016455503180623055, |
| "rewards/rejected": -0.04796398803591728, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 7.510894622614614, |
| "learning_rate": 4.2499999999999995e-07, |
| "logps/chosen": -42.22370529174805, |
| "logps/rejected": -48.45228576660156, |
| "loss": 0.688, |
| "losses/dpo": 0.6784626841545105, |
| "losses/sft": 1.7890020608901978, |
| "losses/total": 0.6784626841545105, |
| "ref_logps/chosen": -41.76036834716797, |
| "ref_logps/rejected": -47.876914978027344, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -0.046334076672792435, |
| "rewards/margins": 0.011203275993466377, |
| "rewards/rejected": -0.05753735080361366, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 7.524260365013066, |
| "learning_rate": 4.375e-07, |
| "logps/chosen": -41.117164611816406, |
| "logps/rejected": -47.30539321899414, |
| "loss": 0.6874, |
| "losses/dpo": 0.6890352368354797, |
| "losses/sft": 1.9127196073532104, |
| "losses/total": 0.6890352368354797, |
| "ref_logps/chosen": -40.611358642578125, |
| "ref_logps/rejected": -46.675148010253906, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.0505804568529129, |
| "rewards/margins": 0.01244389358907938, |
| "rewards/rejected": -0.0630243569612503, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 7.835502878067996, |
| "learning_rate": 4.5e-07, |
| "logps/chosen": -42.31553649902344, |
| "logps/rejected": -48.77557373046875, |
| "loss": 0.6846, |
| "losses/dpo": 0.6801737546920776, |
| "losses/sft": 1.0419285297393799, |
| "losses/total": 0.6801737546920776, |
| "ref_logps/chosen": -41.76920700073242, |
| "ref_logps/rejected": -48.04461669921875, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.054632995277643204, |
| "rewards/margins": 0.0184622872620821, |
| "rewards/rejected": -0.07309528440237045, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 10.089436620525504, |
| "learning_rate": 4.625e-07, |
| "logps/chosen": -40.23670196533203, |
| "logps/rejected": -44.72167205810547, |
| "loss": 0.6856, |
| "losses/dpo": 0.6820752620697021, |
| "losses/sft": 1.6919959783554077, |
| "losses/total": 0.6820752620697021, |
| "ref_logps/chosen": -39.70112991333008, |
| "ref_logps/rejected": -44.0257682800293, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.05355698987841606, |
| "rewards/margins": 0.016033286228775978, |
| "rewards/rejected": -0.06959027796983719, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 7.4705752998877974, |
| "learning_rate": 4.7499999999999995e-07, |
| "logps/chosen": -40.608951568603516, |
| "logps/rejected": -46.77935791015625, |
| "loss": 0.6846, |
| "losses/dpo": 0.7083909511566162, |
| "losses/sft": 1.3596407175064087, |
| "losses/total": 0.7083909511566162, |
| "ref_logps/chosen": -40.02342987060547, |
| "ref_logps/rejected": -46.008670806884766, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.05855226144194603, |
| "rewards/margins": 0.018516112118959427, |
| "rewards/rejected": -0.07706836611032486, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 7.4834962101051445, |
| "learning_rate": 4.875e-07, |
| "logps/chosen": -38.924591064453125, |
| "logps/rejected": -44.15880584716797, |
| "loss": 0.6806, |
| "losses/dpo": 0.6712931394577026, |
| "losses/sft": 1.4741321802139282, |
| "losses/total": 0.6712931394577026, |
| "ref_logps/chosen": -38.32358932495117, |
| "ref_logps/rejected": -43.294708251953125, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.060100097209215164, |
| "rewards/margins": 0.02630985900759697, |
| "rewards/rejected": -0.08640995621681213, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 7.593289226892247, |
| "learning_rate": 5e-07, |
| "logps/chosen": -36.14179611206055, |
| "logps/rejected": -43.69697952270508, |
| "loss": 0.6893, |
| "losses/dpo": 0.6905455589294434, |
| "losses/sft": 1.8340303897857666, |
| "losses/total": 0.6905455589294434, |
| "ref_logps/chosen": -35.46891784667969, |
| "ref_logps/rejected": -42.935638427734375, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.06728792935609818, |
| "rewards/margins": 0.008846651762723923, |
| "rewards/rejected": -0.0761345773935318, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 7.184019422431451, |
| "learning_rate": 4.985955056179775e-07, |
| "logps/chosen": -36.04156494140625, |
| "logps/rejected": -44.501773834228516, |
| "loss": 0.6765, |
| "losses/dpo": 0.6632527112960815, |
| "losses/sft": 1.562534213066101, |
| "losses/total": 0.6632527112960815, |
| "ref_logps/chosen": -35.38131332397461, |
| "ref_logps/rejected": -43.47880935668945, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.06602565199136734, |
| "rewards/margins": 0.036271147429943085, |
| "rewards/rejected": -0.10229679197072983, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 7.817004450242549, |
| "learning_rate": 4.97191011235955e-07, |
| "logps/chosen": -40.78254699707031, |
| "logps/rejected": -48.181861877441406, |
| "loss": 0.6803, |
| "losses/dpo": 0.7099467515945435, |
| "losses/sft": 1.8783167600631714, |
| "losses/total": 0.7099467515945435, |
| "ref_logps/chosen": -40.004154205322266, |
| "ref_logps/rejected": -47.115760803222656, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.07783940434455872, |
| "rewards/margins": 0.028770849108695984, |
| "rewards/rejected": -0.1066102534532547, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 7.433654746121009, |
| "learning_rate": 4.957865168539325e-07, |
| "logps/chosen": -40.71480941772461, |
| "logps/rejected": -47.88724136352539, |
| "loss": 0.6803, |
| "losses/dpo": 0.7072566151618958, |
| "losses/sft": 1.6432607173919678, |
| "losses/total": 0.7072566151618958, |
| "ref_logps/chosen": -39.739933013916016, |
| "ref_logps/rejected": -46.61668014526367, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.09748795628547668, |
| "rewards/margins": 0.029567349702119827, |
| "rewards/rejected": -0.1270553022623062, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 7.64577773243097, |
| "learning_rate": 4.943820224719101e-07, |
| "logps/chosen": -36.590328216552734, |
| "logps/rejected": -45.61329650878906, |
| "loss": 0.6824, |
| "losses/dpo": 0.6843121647834778, |
| "losses/sft": 2.019310235977173, |
| "losses/total": 0.6843121647834778, |
| "ref_logps/chosen": -35.54472732543945, |
| "ref_logps/rejected": -44.32211685180664, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.10456002503633499, |
| "rewards/margins": 0.024558255448937416, |
| "rewards/rejected": -0.12911829352378845, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 7.451674659941506, |
| "learning_rate": 4.929775280898877e-07, |
| "logps/chosen": -39.82986068725586, |
| "logps/rejected": -44.68933868408203, |
| "loss": 0.6767, |
| "losses/dpo": 0.6984357833862305, |
| "losses/sft": 1.321048617362976, |
| "losses/total": 0.6984357833862305, |
| "ref_logps/chosen": -38.77911376953125, |
| "ref_logps/rejected": -43.251163482666016, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.10507487505674362, |
| "rewards/margins": 0.038742441684007645, |
| "rewards/rejected": -0.14381732046604156, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 7.481580488082584, |
| "learning_rate": 4.915730337078651e-07, |
| "logps/chosen": -40.547637939453125, |
| "logps/rejected": -48.37934112548828, |
| "loss": 0.6784, |
| "losses/dpo": 0.682788610458374, |
| "losses/sft": 1.305440902709961, |
| "losses/total": 0.682788610458374, |
| "ref_logps/chosen": -39.39177703857422, |
| "ref_logps/rejected": -46.888671875, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.11558566987514496, |
| "rewards/margins": 0.03348149359226227, |
| "rewards/rejected": -0.14906716346740723, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 7.503690704863454, |
| "learning_rate": 4.901685393258427e-07, |
| "logps/chosen": -43.58844757080078, |
| "logps/rejected": -46.27735137939453, |
| "loss": 0.6796, |
| "losses/dpo": 0.6725805997848511, |
| "losses/sft": 1.7845996618270874, |
| "losses/total": 0.6725805997848511, |
| "ref_logps/chosen": -42.20465850830078, |
| "ref_logps/rejected": -44.57255554199219, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -0.13837924599647522, |
| "rewards/margins": 0.03210053965449333, |
| "rewards/rejected": -0.17047978937625885, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 7.6005277154002275, |
| "learning_rate": 4.887640449438202e-07, |
| "logps/chosen": -40.19657897949219, |
| "logps/rejected": -46.2965087890625, |
| "loss": 0.6806, |
| "losses/dpo": 0.6915363669395447, |
| "losses/sft": 1.4134502410888672, |
| "losses/total": 0.6915363669395447, |
| "ref_logps/chosen": -38.93235778808594, |
| "ref_logps/rejected": -44.731178283691406, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.12642225623130798, |
| "rewards/margins": 0.030110429972410202, |
| "rewards/rejected": -0.15653270483016968, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 7.342034157176263, |
| "learning_rate": 4.873595505617978e-07, |
| "logps/chosen": -35.246055603027344, |
| "logps/rejected": -45.013092041015625, |
| "loss": 0.671, |
| "losses/dpo": 0.6273987293243408, |
| "losses/sft": 1.2200208902359009, |
| "losses/total": 0.6273987293243408, |
| "ref_logps/chosen": -33.989261627197266, |
| "ref_logps/rejected": -43.26504898071289, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.12567944824695587, |
| "rewards/margins": 0.04912441223859787, |
| "rewards/rejected": -0.17480388283729553, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.768856876362336, |
| "learning_rate": 4.859550561797752e-07, |
| "logps/chosen": -41.647483825683594, |
| "logps/rejected": -49.59557342529297, |
| "loss": 0.6668, |
| "losses/dpo": 0.6410457491874695, |
| "losses/sft": 2.0004844665527344, |
| "losses/total": 0.6410457491874695, |
| "ref_logps/chosen": -40.150108337402344, |
| "ref_logps/rejected": -47.4974365234375, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.1497381180524826, |
| "rewards/margins": 0.06007564440369606, |
| "rewards/rejected": -0.20981375873088837, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.482808682633612, |
| "learning_rate": 4.845505617977528e-07, |
| "logps/chosen": -42.28547668457031, |
| "logps/rejected": -46.57417297363281, |
| "loss": 0.6686, |
| "losses/dpo": 0.6821735501289368, |
| "losses/sft": 1.643945336341858, |
| "losses/total": 0.6821735501289368, |
| "ref_logps/chosen": -40.781097412109375, |
| "ref_logps/rejected": -44.47871017456055, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.15043821930885315, |
| "rewards/margins": 0.05910744518041611, |
| "rewards/rejected": -0.20954564213752747, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 7.444552223013138, |
| "learning_rate": 4.831460674157303e-07, |
| "logps/chosen": -38.653770446777344, |
| "logps/rejected": -47.96025848388672, |
| "loss": 0.6696, |
| "losses/dpo": 0.653758704662323, |
| "losses/sft": 1.9075889587402344, |
| "losses/total": 0.653758704662323, |
| "ref_logps/chosen": -37.04439163208008, |
| "ref_logps/rejected": -45.76567840576172, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.16093730926513672, |
| "rewards/margins": 0.05852021649479866, |
| "rewards/rejected": -0.2194575071334839, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 7.051622049892774, |
| "learning_rate": 4.817415730337078e-07, |
| "logps/chosen": -36.511940002441406, |
| "logps/rejected": -42.634193420410156, |
| "loss": 0.672, |
| "losses/dpo": 0.6566299200057983, |
| "losses/sft": 1.6063774824142456, |
| "losses/total": 0.6566299200057983, |
| "ref_logps/chosen": -34.99435806274414, |
| "ref_logps/rejected": -40.612342834472656, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.15175840258598328, |
| "rewards/margins": 0.05042674392461777, |
| "rewards/rejected": -0.20218515396118164, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 7.752968590362967, |
| "learning_rate": 4.803370786516854e-07, |
| "logps/chosen": -43.374481201171875, |
| "logps/rejected": -46.1808967590332, |
| "loss": 0.664, |
| "losses/dpo": 0.5915548801422119, |
| "losses/sft": 1.5764846801757812, |
| "losses/total": 0.5915548801422119, |
| "ref_logps/chosen": -41.619239807128906, |
| "ref_logps/rejected": -43.757720947265625, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.17552456259727478, |
| "rewards/margins": 0.06679282337427139, |
| "rewards/rejected": -0.24231737852096558, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 7.375135165111918, |
| "learning_rate": 4.789325842696629e-07, |
| "logps/chosen": -40.775726318359375, |
| "logps/rejected": -45.556365966796875, |
| "loss": 0.6783, |
| "losses/dpo": 0.6768916845321655, |
| "losses/sft": 1.3732706308364868, |
| "losses/total": 0.6768916845321655, |
| "ref_logps/chosen": -39.01034927368164, |
| "ref_logps/rejected": -43.436553955078125, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.17653760313987732, |
| "rewards/margins": 0.03544352203607559, |
| "rewards/rejected": -0.21198111772537231, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 7.238231049231885, |
| "learning_rate": 4.775280898876405e-07, |
| "logps/chosen": -39.11316680908203, |
| "logps/rejected": -45.04530334472656, |
| "loss": 0.6642, |
| "losses/dpo": 0.6528148651123047, |
| "losses/sft": 1.159528136253357, |
| "losses/total": 0.6528148651123047, |
| "ref_logps/chosen": -37.2640266418457, |
| "ref_logps/rejected": -42.495338439941406, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.18491369485855103, |
| "rewards/margins": 0.07008323073387146, |
| "rewards/rejected": -0.2549969553947449, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 7.652181625277677, |
| "learning_rate": 4.7612359550561797e-07, |
| "logps/chosen": -43.244380950927734, |
| "logps/rejected": -49.415409088134766, |
| "loss": 0.6679, |
| "losses/dpo": 0.6176864504814148, |
| "losses/sft": 1.796196460723877, |
| "losses/total": 0.6176864504814148, |
| "ref_logps/chosen": -41.167381286621094, |
| "ref_logps/rejected": -46.70917892456055, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.20770025253295898, |
| "rewards/margins": 0.06292243301868439, |
| "rewards/rejected": -0.2706226706504822, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 8.012193852457372, |
| "learning_rate": 4.747191011235955e-07, |
| "logps/chosen": -38.95054626464844, |
| "logps/rejected": -45.45573043823242, |
| "loss": 0.6545, |
| "losses/dpo": 0.721019983291626, |
| "losses/sft": 1.6278411149978638, |
| "losses/total": 0.721019983291626, |
| "ref_logps/chosen": -37.17967987060547, |
| "ref_logps/rejected": -42.77497100830078, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.17708644270896912, |
| "rewards/margins": 0.09098967909812927, |
| "rewards/rejected": -0.2680761218070984, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 7.745152630727936, |
| "learning_rate": 4.7331460674157303e-07, |
| "logps/chosen": -41.84577560424805, |
| "logps/rejected": -54.23434066772461, |
| "loss": 0.654, |
| "losses/dpo": 0.601816713809967, |
| "losses/sft": 1.5886242389678955, |
| "losses/total": 0.601816713809967, |
| "ref_logps/chosen": -39.931434631347656, |
| "ref_logps/rejected": -51.34343719482422, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.19143418967723846, |
| "rewards/margins": 0.09765592962503433, |
| "rewards/rejected": -0.2890901267528534, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 8.104292528879256, |
| "learning_rate": 4.7191011235955054e-07, |
| "logps/chosen": -40.5402717590332, |
| "logps/rejected": -48.11115264892578, |
| "loss": 0.6612, |
| "losses/dpo": 0.6307883858680725, |
| "losses/sft": 1.6475903987884521, |
| "losses/total": 0.6307883858680725, |
| "ref_logps/chosen": -38.59111022949219, |
| "ref_logps/rejected": -45.39031219482422, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.19491644203662872, |
| "rewards/margins": 0.07716768234968185, |
| "rewards/rejected": -0.27208411693573, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 8.24076971848322, |
| "learning_rate": 4.705056179775281e-07, |
| "logps/chosen": -41.55039978027344, |
| "logps/rejected": -51.43959426879883, |
| "loss": 0.6677, |
| "losses/dpo": 0.7012457251548767, |
| "losses/sft": 2.175475597381592, |
| "losses/total": 0.7012457251548767, |
| "ref_logps/chosen": -39.29528045654297, |
| "ref_logps/rejected": -48.50824737548828, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.22551202774047852, |
| "rewards/margins": 0.06762254983186722, |
| "rewards/rejected": -0.2931345999240875, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 8.35481489409821, |
| "learning_rate": 4.691011235955056e-07, |
| "logps/chosen": -43.0926399230957, |
| "logps/rejected": -47.44728088378906, |
| "loss": 0.6462, |
| "losses/dpo": 0.6053961515426636, |
| "losses/sft": 1.459052324295044, |
| "losses/total": 0.6053961515426636, |
| "ref_logps/chosen": -40.96197509765625, |
| "ref_logps/rejected": -44.2166633605957, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.2130661904811859, |
| "rewards/margins": 0.1099955290555954, |
| "rewards/rejected": -0.3230617344379425, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 7.797774561384425, |
| "learning_rate": 4.6769662921348315e-07, |
| "logps/chosen": -38.055320739746094, |
| "logps/rejected": -47.66813659667969, |
| "loss": 0.652, |
| "losses/dpo": 0.632691502571106, |
| "losses/sft": 1.4375559091567993, |
| "losses/total": 0.632691502571106, |
| "ref_logps/chosen": -36.05341720581055, |
| "ref_logps/rejected": -44.64914321899414, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.20018979907035828, |
| "rewards/margins": 0.10170910507440567, |
| "rewards/rejected": -0.30189892649650574, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 7.28638958987082, |
| "learning_rate": 4.662921348314606e-07, |
| "logps/chosen": -39.81006622314453, |
| "logps/rejected": -46.78810501098633, |
| "loss": 0.662, |
| "losses/dpo": 0.6003807783126831, |
| "losses/sft": 1.3374682664871216, |
| "losses/total": 0.6003807783126831, |
| "ref_logps/chosen": -37.332698822021484, |
| "ref_logps/rejected": -43.507659912109375, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.247736856341362, |
| "rewards/margins": 0.0803074836730957, |
| "rewards/rejected": -0.3280443251132965, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 7.798097069372596, |
| "learning_rate": 4.6488764044943816e-07, |
| "logps/chosen": -45.76461410522461, |
| "logps/rejected": -50.199825286865234, |
| "loss": 0.6814, |
| "losses/dpo": 0.6105685234069824, |
| "losses/sft": 1.8878819942474365, |
| "losses/total": 0.6105685234069824, |
| "ref_logps/chosen": -42.697021484375, |
| "ref_logps/rejected": -46.67133331298828, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.30675944685935974, |
| "rewards/margins": 0.046089351177215576, |
| "rewards/rejected": -0.35284876823425293, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 7.825603469401775, |
| "learning_rate": 4.634831460674157e-07, |
| "logps/chosen": -43.35639953613281, |
| "logps/rejected": -53.741355895996094, |
| "loss": 0.6436, |
| "losses/dpo": 0.6293699741363525, |
| "losses/sft": 1.4026882648468018, |
| "losses/total": 0.6293699741363525, |
| "ref_logps/chosen": -40.74187469482422, |
| "ref_logps/rejected": -49.880897521972656, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.2614526152610779, |
| "rewards/margins": 0.12459328025579453, |
| "rewards/rejected": -0.3860458731651306, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 7.635556989764748, |
| "learning_rate": 4.620786516853932e-07, |
| "logps/chosen": -40.68864440917969, |
| "logps/rejected": -46.91835021972656, |
| "loss": 0.6539, |
| "losses/dpo": 0.7432792782783508, |
| "losses/sft": 1.5986056327819824, |
| "losses/total": 0.7432792782783508, |
| "ref_logps/chosen": -38.21084213256836, |
| "ref_logps/rejected": -43.418067932128906, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.24778038263320923, |
| "rewards/margins": 0.10224790126085281, |
| "rewards/rejected": -0.35002827644348145, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 7.6861933707333305, |
| "learning_rate": 4.606741573033708e-07, |
| "logps/chosen": -42.49998092651367, |
| "logps/rejected": -49.80910873413086, |
| "loss": 0.6241, |
| "losses/dpo": 0.5949782133102417, |
| "losses/sft": 1.2951277494430542, |
| "losses/total": 0.5949782133102417, |
| "ref_logps/chosen": -40.09019088745117, |
| "ref_logps/rejected": -45.69980239868164, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.24097900092601776, |
| "rewards/margins": 0.16995173692703247, |
| "rewards/rejected": -0.4109307527542114, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.5671933847566555, |
| "learning_rate": 4.592696629213483e-07, |
| "logps/chosen": -42.11725616455078, |
| "logps/rejected": -52.82745361328125, |
| "loss": 0.6438, |
| "losses/dpo": 0.6235805749893188, |
| "losses/sft": 1.4768122434616089, |
| "losses/total": 0.6235805749893188, |
| "ref_logps/chosen": -39.234642028808594, |
| "ref_logps/rejected": -48.657501220703125, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.2882614731788635, |
| "rewards/margins": 0.12873350083827972, |
| "rewards/rejected": -0.41699495911598206, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 7.380869348497186, |
| "learning_rate": 4.5786516853932584e-07, |
| "logps/chosen": -40.33063507080078, |
| "logps/rejected": -47.34225845336914, |
| "loss": 0.6519, |
| "losses/dpo": 0.6829323768615723, |
| "losses/sft": 1.6434234380722046, |
| "losses/total": 0.6829323768615723, |
| "ref_logps/chosen": -37.294776916503906, |
| "ref_logps/rejected": -43.10985565185547, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.30358612537384033, |
| "rewards/margins": 0.11965445429086685, |
| "rewards/rejected": -0.4232405722141266, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.505680485493994, |
| "learning_rate": 4.5646067415730334e-07, |
| "logps/chosen": -40.74094009399414, |
| "logps/rejected": -49.552616119384766, |
| "loss": 0.6423, |
| "losses/dpo": 0.6549758315086365, |
| "losses/sft": 1.5461335182189941, |
| "losses/total": 0.6549758315086365, |
| "ref_logps/chosen": -37.58620071411133, |
| "ref_logps/rejected": -45.091209411621094, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.3154744505882263, |
| "rewards/margins": 0.1306663304567337, |
| "rewards/rejected": -0.4461407959461212, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.805459051678804, |
| "learning_rate": 4.550561797752809e-07, |
| "logps/chosen": -42.94220733642578, |
| "logps/rejected": -54.635963439941406, |
| "loss": 0.6384, |
| "losses/dpo": 0.589752733707428, |
| "losses/sft": 1.55972421169281, |
| "losses/total": 0.589752733707428, |
| "ref_logps/chosen": -39.39100646972656, |
| "ref_logps/rejected": -49.64848709106445, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.35512006282806396, |
| "rewards/margins": 0.1436270773410797, |
| "rewards/rejected": -0.49874716997146606, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 8.048352395094511, |
| "learning_rate": 4.536516853932584e-07, |
| "logps/chosen": -41.640235900878906, |
| "logps/rejected": -53.20794677734375, |
| "loss": 0.621, |
| "losses/dpo": 0.6062641143798828, |
| "losses/sft": 1.0970079898834229, |
| "losses/total": 0.6062641143798828, |
| "ref_logps/chosen": -38.45201873779297, |
| "ref_logps/rejected": -48.13432312011719, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.3188212513923645, |
| "rewards/margins": 0.18854106962680817, |
| "rewards/rejected": -0.5073623061180115, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 12.072882817554818, |
| "learning_rate": 4.522471910112359e-07, |
| "logps/chosen": -43.404823303222656, |
| "logps/rejected": -50.17894744873047, |
| "loss": 0.6527, |
| "losses/dpo": 0.5960186719894409, |
| "losses/sft": 1.444412112236023, |
| "losses/total": 0.5960186719894409, |
| "ref_logps/chosen": -39.74496841430664, |
| "ref_logps/rejected": -45.32371139526367, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.3659852147102356, |
| "rewards/margins": 0.11953801661729813, |
| "rewards/rejected": -0.4855232238769531, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 7.650328731492573, |
| "learning_rate": 4.5084269662921347e-07, |
| "logps/chosen": -43.5896110534668, |
| "logps/rejected": -51.086971282958984, |
| "loss": 0.6409, |
| "losses/dpo": 0.6057982444763184, |
| "losses/sft": 1.4658453464508057, |
| "losses/total": 0.6057982444763184, |
| "ref_logps/chosen": -39.98381423950195, |
| "ref_logps/rejected": -45.86697769165039, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.3605796992778778, |
| "rewards/margins": 0.16141945123672485, |
| "rewards/rejected": -0.521999180316925, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 7.945574535694735, |
| "learning_rate": 4.4943820224719097e-07, |
| "logps/chosen": -44.07162094116211, |
| "logps/rejected": -50.66339874267578, |
| "loss": 0.6556, |
| "losses/dpo": 0.6170323491096497, |
| "losses/sft": 1.8739807605743408, |
| "losses/total": 0.6170323491096497, |
| "ref_logps/chosen": -40.06593322753906, |
| "ref_logps/rejected": -45.41100311279297, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.4005686044692993, |
| "rewards/margins": 0.12467078864574432, |
| "rewards/rejected": -0.5252394080162048, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 9.606133876539282, |
| "learning_rate": 4.4803370786516853e-07, |
| "logps/chosen": -42.20341110229492, |
| "logps/rejected": -50.49468231201172, |
| "loss": 0.6503, |
| "losses/dpo": 0.586646556854248, |
| "losses/sft": 1.5989439487457275, |
| "losses/total": 0.586646556854248, |
| "ref_logps/chosen": -38.3419075012207, |
| "ref_logps/rejected": -45.38350296020508, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.3861507773399353, |
| "rewards/margins": 0.12496703863143921, |
| "rewards/rejected": -0.5111178159713745, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 8.32429224169226, |
| "learning_rate": 4.4662921348314603e-07, |
| "logps/chosen": -42.94104766845703, |
| "logps/rejected": -54.27581787109375, |
| "loss": 0.6335, |
| "losses/dpo": 0.629318356513977, |
| "losses/sft": 1.5925976037979126, |
| "losses/total": 0.629318356513977, |
| "ref_logps/chosen": -38.90857696533203, |
| "ref_logps/rejected": -48.47710037231445, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.40324676036834717, |
| "rewards/margins": 0.1766246110200882, |
| "rewards/rejected": -0.5798712968826294, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 7.898667752271484, |
| "learning_rate": 4.452247191011236e-07, |
| "logps/chosen": -42.00642395019531, |
| "logps/rejected": -51.58437728881836, |
| "loss": 0.6326, |
| "losses/dpo": 0.6813949346542358, |
| "losses/sft": 1.5958709716796875, |
| "losses/total": 0.6813949346542358, |
| "ref_logps/chosen": -38.2513427734375, |
| "ref_logps/rejected": -46.194374084472656, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.3755083978176117, |
| "rewards/margins": 0.16349197924137115, |
| "rewards/rejected": -0.5390004515647888, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 8.4984309671094, |
| "learning_rate": 4.438202247191011e-07, |
| "logps/chosen": -47.29401397705078, |
| "logps/rejected": -54.29883575439453, |
| "loss": 0.642, |
| "losses/dpo": 0.6930491924285889, |
| "losses/sft": 1.8281772136688232, |
| "losses/total": 0.6930491924285889, |
| "ref_logps/chosen": -42.54518127441406, |
| "ref_logps/rejected": -47.80082321166992, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.47488299012184143, |
| "rewards/margins": 0.17491832375526428, |
| "rewards/rejected": -0.6498013138771057, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 8.385277776330648, |
| "learning_rate": 4.4241573033707865e-07, |
| "logps/chosen": -48.636085510253906, |
| "logps/rejected": -54.089210510253906, |
| "loss": 0.6105, |
| "losses/dpo": 0.5880630612373352, |
| "losses/sft": 1.647892951965332, |
| "losses/total": 0.5880630612373352, |
| "ref_logps/chosen": -43.907169342041016, |
| "ref_logps/rejected": -47.025569915771484, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.472891628742218, |
| "rewards/margins": 0.2334723323583603, |
| "rewards/rejected": -0.7063639760017395, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 10.98551780733234, |
| "learning_rate": 4.410112359550562e-07, |
| "logps/chosen": -44.03840637207031, |
| "logps/rejected": -52.65192413330078, |
| "loss": 0.622, |
| "losses/dpo": 0.5322688817977905, |
| "losses/sft": 1.9210578203201294, |
| "losses/total": 0.5322688817977905, |
| "ref_logps/chosen": -39.61579132080078, |
| "ref_logps/rejected": -46.18141174316406, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.4422611594200134, |
| "rewards/margins": 0.2047904133796692, |
| "rewards/rejected": -0.6470515727996826, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 7.7041080998180425, |
| "learning_rate": 4.3960674157303366e-07, |
| "logps/chosen": -44.86250305175781, |
| "logps/rejected": -52.133052825927734, |
| "loss": 0.6265, |
| "losses/dpo": 0.5972993969917297, |
| "losses/sft": 1.4703764915466309, |
| "losses/total": 0.5972993969917297, |
| "ref_logps/chosen": -40.2207145690918, |
| "ref_logps/rejected": -45.56148910522461, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.46417874097824097, |
| "rewards/margins": 0.192977637052536, |
| "rewards/rejected": -0.6571563482284546, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 7.893501393776045, |
| "learning_rate": 4.382022471910112e-07, |
| "logps/chosen": -45.97453689575195, |
| "logps/rejected": -55.243316650390625, |
| "loss": 0.6289, |
| "losses/dpo": 0.5659444332122803, |
| "losses/sft": 1.6541783809661865, |
| "losses/total": 0.5659444332122803, |
| "ref_logps/chosen": -40.886837005615234, |
| "ref_logps/rejected": -48.23194885253906, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5087698698043823, |
| "rewards/margins": 0.19236721098423004, |
| "rewards/rejected": -0.7011370062828064, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 9.509139423826628, |
| "learning_rate": 4.367977528089887e-07, |
| "logps/chosen": -44.12831115722656, |
| "logps/rejected": -54.7608642578125, |
| "loss": 0.6193, |
| "losses/dpo": 0.5432471036911011, |
| "losses/sft": 1.8381226062774658, |
| "losses/total": 0.5432471036911011, |
| "ref_logps/chosen": -39.457069396972656, |
| "ref_logps/rejected": -47.7266731262207, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.46712398529052734, |
| "rewards/margins": 0.23629523813724518, |
| "rewards/rejected": -0.7034192681312561, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 7.887107642093141, |
| "learning_rate": 4.353932584269663e-07, |
| "logps/chosen": -45.145904541015625, |
| "logps/rejected": -55.40123748779297, |
| "loss": 0.6092, |
| "losses/dpo": 0.5780594944953918, |
| "losses/sft": 1.9163440465927124, |
| "losses/total": 0.5780594944953918, |
| "ref_logps/chosen": -40.329139709472656, |
| "ref_logps/rejected": -48.183074951171875, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.4816761910915375, |
| "rewards/margins": 0.24014019966125488, |
| "rewards/rejected": -0.7218164205551147, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 8.39204893477319, |
| "learning_rate": 4.339887640449438e-07, |
| "logps/chosen": -46.145851135253906, |
| "logps/rejected": -53.961036682128906, |
| "loss": 0.6403, |
| "losses/dpo": 0.60181725025177, |
| "losses/sft": 1.625700831413269, |
| "losses/total": 0.60181725025177, |
| "ref_logps/chosen": -40.71726608276367, |
| "ref_logps/rejected": -46.57926940917969, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.5428579449653625, |
| "rewards/margins": 0.19531863927841187, |
| "rewards/rejected": -0.7381765842437744, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 8.63628479914341, |
| "learning_rate": 4.3258426966292134e-07, |
| "logps/chosen": -48.23160171508789, |
| "logps/rejected": -53.496604919433594, |
| "loss": 0.6382, |
| "losses/dpo": 0.5790094137191772, |
| "losses/sft": 1.4581667184829712, |
| "losses/total": 0.5790094137191772, |
| "ref_logps/chosen": -42.78254699707031, |
| "ref_logps/rejected": -46.28538131713867, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5449056029319763, |
| "rewards/margins": 0.1762169450521469, |
| "rewards/rejected": -0.7211225628852844, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 7.682264878764254, |
| "learning_rate": 4.311797752808989e-07, |
| "logps/chosen": -44.9177360534668, |
| "logps/rejected": -57.826866149902344, |
| "loss": 0.5789, |
| "losses/dpo": 0.5269919037818909, |
| "losses/sft": 1.9525985717773438, |
| "losses/total": 0.5269919037818909, |
| "ref_logps/chosen": -39.326385498046875, |
| "ref_logps/rejected": -48.92866134643555, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.5591354370117188, |
| "rewards/margins": 0.33068495988845825, |
| "rewards/rejected": -0.889820396900177, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 9.993714538070773, |
| "learning_rate": 4.297752808988764e-07, |
| "logps/chosen": -47.74348068237305, |
| "logps/rejected": -51.28035354614258, |
| "loss": 0.6687, |
| "losses/dpo": 0.6379462480545044, |
| "losses/sft": 1.61128830909729, |
| "losses/total": 0.6379462480545044, |
| "ref_logps/chosen": -41.72521209716797, |
| "ref_logps/rejected": -44.09041976928711, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.6018266677856445, |
| "rewards/margins": 0.11716663837432861, |
| "rewards/rejected": -0.7189933061599731, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 9.79439579593073, |
| "learning_rate": 4.2837078651685396e-07, |
| "logps/chosen": -46.20167541503906, |
| "logps/rejected": -54.01323318481445, |
| "loss": 0.6199, |
| "losses/dpo": 0.4756242632865906, |
| "losses/sft": 1.6830320358276367, |
| "losses/total": 0.4756242632865906, |
| "ref_logps/chosen": -40.16909408569336, |
| "ref_logps/rejected": -45.69634246826172, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.6032581329345703, |
| "rewards/margins": 0.22843076288700104, |
| "rewards/rejected": -0.8316888809204102, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 8.115443534609298, |
| "learning_rate": 4.269662921348314e-07, |
| "logps/chosen": -49.6710319519043, |
| "logps/rejected": -60.84608840942383, |
| "loss": 0.5948, |
| "losses/dpo": 0.6356014013290405, |
| "losses/sft": 1.7809040546417236, |
| "losses/total": 0.6356014013290405, |
| "ref_logps/chosen": -43.4056282043457, |
| "ref_logps/rejected": -51.69749069213867, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.6265405416488647, |
| "rewards/margins": 0.2883196473121643, |
| "rewards/rejected": -0.914860188961029, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 8.161770613063224, |
| "learning_rate": 4.2556179775280896e-07, |
| "logps/chosen": -44.44715881347656, |
| "logps/rejected": -52.76496124267578, |
| "loss": 0.6499, |
| "losses/dpo": 0.5320106744766235, |
| "losses/sft": 1.5193849802017212, |
| "losses/total": 0.5320106744766235, |
| "ref_logps/chosen": -38.260562896728516, |
| "ref_logps/rejected": -44.902198791503906, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.618659496307373, |
| "rewards/margins": 0.1676165610551834, |
| "rewards/rejected": -0.78627610206604, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 11.63146009410558, |
| "learning_rate": 4.2415730337078647e-07, |
| "logps/chosen": -48.569419860839844, |
| "logps/rejected": -55.14238357543945, |
| "loss": 0.6309, |
| "losses/dpo": 0.5968553423881531, |
| "losses/sft": 1.5720221996307373, |
| "losses/total": 0.5968553423881531, |
| "ref_logps/chosen": -42.18726348876953, |
| "ref_logps/rejected": -46.23759460449219, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.6382158994674683, |
| "rewards/margins": 0.2522626221179962, |
| "rewards/rejected": -0.8904784917831421, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.826574210546138, |
| "learning_rate": 4.22752808988764e-07, |
| "logps/chosen": -48.970035552978516, |
| "logps/rejected": -56.224632263183594, |
| "loss": 0.6209, |
| "losses/dpo": 0.566143810749054, |
| "losses/sft": 1.7626792192459106, |
| "losses/total": 0.566143810749054, |
| "ref_logps/chosen": -42.288108825683594, |
| "ref_logps/rejected": -47.209964752197266, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.6681923270225525, |
| "rewards/margins": 0.23327398300170898, |
| "rewards/rejected": -0.9014662504196167, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.984390488888068, |
| "learning_rate": 4.2134831460674153e-07, |
| "logps/chosen": -44.690216064453125, |
| "logps/rejected": -57.38431930541992, |
| "loss": 0.5916, |
| "losses/dpo": 0.5278609395027161, |
| "losses/sft": 1.7001551389694214, |
| "losses/total": 0.5278609395027161, |
| "ref_logps/chosen": -39.018001556396484, |
| "ref_logps/rejected": -48.73064422607422, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.567221999168396, |
| "rewards/margins": 0.2981455326080322, |
| "rewards/rejected": -0.8653674125671387, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 8.140376307221391, |
| "learning_rate": 4.199438202247191e-07, |
| "logps/chosen": -45.94129180908203, |
| "logps/rejected": -57.70640563964844, |
| "loss": 0.5843, |
| "losses/dpo": 0.6323425769805908, |
| "losses/sft": 1.9729546308517456, |
| "losses/total": 0.6323425769805908, |
| "ref_logps/chosen": -39.79209899902344, |
| "ref_logps/rejected": -47.78965377807617, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6149196624755859, |
| "rewards/margins": 0.37675485014915466, |
| "rewards/rejected": -0.991674542427063, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 8.940775697669213, |
| "learning_rate": 4.1853932584269664e-07, |
| "logps/chosen": -49.44526672363281, |
| "logps/rejected": -54.83664321899414, |
| "loss": 0.6672, |
| "losses/dpo": 0.6795445084571838, |
| "losses/sft": 1.7624062299728394, |
| "losses/total": 0.6795445084571838, |
| "ref_logps/chosen": -42.365447998046875, |
| "ref_logps/rejected": -46.211761474609375, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.7079817652702332, |
| "rewards/margins": 0.15450690686702728, |
| "rewards/rejected": -0.8624885678291321, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 7.425234946687916, |
| "learning_rate": 4.1713483146067415e-07, |
| "logps/chosen": -43.5301513671875, |
| "logps/rejected": -52.020164489746094, |
| "loss": 0.5957, |
| "losses/dpo": 0.6246699690818787, |
| "losses/sft": 1.7049494981765747, |
| "losses/total": 0.6246699690818787, |
| "ref_logps/chosen": -37.423370361328125, |
| "ref_logps/rejected": -42.76348876953125, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.6106777191162109, |
| "rewards/margins": 0.31499022245407104, |
| "rewards/rejected": -0.9256680607795715, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 9.179756029554161, |
| "learning_rate": 4.157303370786517e-07, |
| "logps/chosen": -49.9049072265625, |
| "logps/rejected": -51.58677673339844, |
| "loss": 0.6817, |
| "losses/dpo": 0.4553123712539673, |
| "losses/sft": 1.8781102895736694, |
| "losses/total": 0.4553123712539673, |
| "ref_logps/chosen": -42.09438705444336, |
| "ref_logps/rejected": -42.1667594909668, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.7810521721839905, |
| "rewards/margins": 0.16094914078712463, |
| "rewards/rejected": -0.9420013427734375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 8.48875168411848, |
| "learning_rate": 4.1432584269662915e-07, |
| "logps/chosen": -43.814697265625, |
| "logps/rejected": -56.10200881958008, |
| "loss": 0.5895, |
| "losses/dpo": 0.6046161651611328, |
| "losses/sft": 1.7643048763275146, |
| "losses/total": 0.6046161651611328, |
| "ref_logps/chosen": -37.67271423339844, |
| "ref_logps/rejected": -46.54473114013672, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6141979694366455, |
| "rewards/margins": 0.3415302038192749, |
| "rewards/rejected": -0.9557281732559204, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 8.913022100408304, |
| "learning_rate": 4.129213483146067e-07, |
| "logps/chosen": -43.33885955810547, |
| "logps/rejected": -49.23841094970703, |
| "loss": 0.6399, |
| "losses/dpo": 0.5780912637710571, |
| "losses/sft": 2.1572117805480957, |
| "losses/total": 0.5780912637710571, |
| "ref_logps/chosen": -37.323486328125, |
| "ref_logps/rejected": -41.119110107421875, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.6015373468399048, |
| "rewards/margins": 0.21039217710494995, |
| "rewards/rejected": -0.8119295239448547, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 8.206141020123177, |
| "learning_rate": 4.115168539325842e-07, |
| "logps/chosen": -48.0225830078125, |
| "logps/rejected": -54.95545196533203, |
| "loss": 0.6109, |
| "losses/dpo": 0.5648887753486633, |
| "losses/sft": 1.740958571434021, |
| "losses/total": 0.5648887753486633, |
| "ref_logps/chosen": -41.10425567626953, |
| "ref_logps/rejected": -45.241817474365234, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6918322443962097, |
| "rewards/margins": 0.27953118085861206, |
| "rewards/rejected": -0.9713634848594666, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 7.999449117068295, |
| "learning_rate": 4.1011235955056177e-07, |
| "logps/chosen": -48.33061981201172, |
| "logps/rejected": -56.14019775390625, |
| "loss": 0.6036, |
| "losses/dpo": 0.5798739194869995, |
| "losses/sft": 1.6369647979736328, |
| "losses/total": 0.5798739194869995, |
| "ref_logps/chosen": -40.94407653808594, |
| "ref_logps/rejected": -45.9286003112793, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.7386540174484253, |
| "rewards/margins": 0.28250569105148315, |
| "rewards/rejected": -1.0211596488952637, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 7.961208032843563, |
| "learning_rate": 4.0870786516853933e-07, |
| "logps/chosen": -43.01828384399414, |
| "logps/rejected": -55.2956657409668, |
| "loss": 0.5814, |
| "losses/dpo": 0.5777114629745483, |
| "losses/sft": 2.0679547786712646, |
| "losses/total": 0.5777114629745483, |
| "ref_logps/chosen": -36.25102233886719, |
| "ref_logps/rejected": -45.063175201416016, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.6767261624336243, |
| "rewards/margins": 0.3465230464935303, |
| "rewards/rejected": -1.0232491493225098, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 8.162855072558866, |
| "learning_rate": 4.0730337078651683e-07, |
| "logps/chosen": -38.68614959716797, |
| "logps/rejected": -51.58649444580078, |
| "loss": 0.6392, |
| "losses/dpo": 0.6045973896980286, |
| "losses/sft": 1.7759897708892822, |
| "losses/total": 0.6045973896980286, |
| "ref_logps/chosen": -32.37638854980469, |
| "ref_logps/rejected": -42.94057083129883, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.6309766173362732, |
| "rewards/margins": 0.2336157262325287, |
| "rewards/rejected": -0.8645923733711243, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 10.312576606575677, |
| "learning_rate": 4.058988764044944e-07, |
| "logps/chosen": -50.928245544433594, |
| "logps/rejected": -59.36943054199219, |
| "loss": 0.654, |
| "losses/dpo": 0.735203206539154, |
| "losses/sft": 2.190847158432007, |
| "losses/total": 0.735203206539154, |
| "ref_logps/chosen": -42.11771774291992, |
| "ref_logps/rejected": -48.740692138671875, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.8810529112815857, |
| "rewards/margins": 0.18182089924812317, |
| "rewards/rejected": -1.0628738403320312, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 8.648201545330428, |
| "learning_rate": 4.044943820224719e-07, |
| "logps/chosen": -47.14788818359375, |
| "logps/rejected": -54.653385162353516, |
| "loss": 0.63, |
| "losses/dpo": 0.6501352787017822, |
| "losses/sft": 2.0834176540374756, |
| "losses/total": 0.6501352787017822, |
| "ref_logps/chosen": -39.38991928100586, |
| "ref_logps/rejected": -44.46839904785156, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.7757968902587891, |
| "rewards/margins": 0.24270157516002655, |
| "rewards/rejected": -1.0184985399246216, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 9.8578017889453, |
| "learning_rate": 4.0308988764044945e-07, |
| "logps/chosen": -48.94048309326172, |
| "logps/rejected": -56.453369140625, |
| "loss": 0.6293, |
| "losses/dpo": 0.7049952149391174, |
| "losses/sft": 2.280228614807129, |
| "losses/total": 0.7049952149391174, |
| "ref_logps/chosen": -41.544471740722656, |
| "ref_logps/rejected": -46.083526611328125, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.7396014332771301, |
| "rewards/margins": 0.29738324880599976, |
| "rewards/rejected": -1.0369845628738403, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 8.394061989951249, |
| "learning_rate": 4.0168539325842696e-07, |
| "logps/chosen": -45.53207015991211, |
| "logps/rejected": -54.01716613769531, |
| "loss": 0.6652, |
| "losses/dpo": 0.5625556111335754, |
| "losses/sft": 2.00128436088562, |
| "losses/total": 0.5625556111335754, |
| "ref_logps/chosen": -37.465904235839844, |
| "ref_logps/rejected": -44.16294860839844, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.8066164255142212, |
| "rewards/margins": 0.17880576848983765, |
| "rewards/rejected": -0.9854221940040588, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 8.76540952525436, |
| "learning_rate": 4.0028089887640446e-07, |
| "logps/chosen": -46.494407653808594, |
| "logps/rejected": -56.70625686645508, |
| "loss": 0.6355, |
| "losses/dpo": 0.816941499710083, |
| "losses/sft": 2.149186372756958, |
| "losses/total": 0.816941499710083, |
| "ref_logps/chosen": -38.6019401550293, |
| "ref_logps/rejected": -46.42439270019531, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.7892469167709351, |
| "rewards/margins": 0.23893946409225464, |
| "rewards/rejected": -1.028186321258545, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 8.706675493262694, |
| "learning_rate": 3.9887640449438196e-07, |
| "logps/chosen": -48.493709564208984, |
| "logps/rejected": -58.21279525756836, |
| "loss": 0.6039, |
| "losses/dpo": 0.49378710985183716, |
| "losses/sft": 1.3456647396087646, |
| "losses/total": 0.49378710985183716, |
| "ref_logps/chosen": -41.08473205566406, |
| "ref_logps/rejected": -47.72603225708008, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.7408972978591919, |
| "rewards/margins": 0.30777889490127563, |
| "rewards/rejected": -1.0486761331558228, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 8.390537463814029, |
| "learning_rate": 3.974719101123595e-07, |
| "logps/chosen": -45.94505310058594, |
| "logps/rejected": -59.44303894042969, |
| "loss": 0.5863, |
| "losses/dpo": 0.7109102606773376, |
| "losses/sft": 1.734868049621582, |
| "losses/total": 0.7109102606773376, |
| "ref_logps/chosen": -39.46669387817383, |
| "ref_logps/rejected": -49.24740219116211, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6478357911109924, |
| "rewards/margins": 0.37172842025756836, |
| "rewards/rejected": -1.0195642709732056, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 7.999605487961454, |
| "learning_rate": 3.960674157303371e-07, |
| "logps/chosen": -43.57410430908203, |
| "logps/rejected": -53.34558868408203, |
| "loss": 0.6172, |
| "losses/dpo": 0.5053269863128662, |
| "losses/sft": 1.450685739517212, |
| "losses/total": 0.5053269863128662, |
| "ref_logps/chosen": -36.833072662353516, |
| "ref_logps/rejected": -43.94281768798828, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6741028428077698, |
| "rewards/margins": 0.26617470383644104, |
| "rewards/rejected": -0.9402774572372437, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 9.415075126299298, |
| "learning_rate": 3.946629213483146e-07, |
| "logps/chosen": -46.565555572509766, |
| "logps/rejected": -57.010372161865234, |
| "loss": 0.5955, |
| "losses/dpo": 0.5628423690795898, |
| "losses/sft": 1.7239865064620972, |
| "losses/total": 0.5628423690795898, |
| "ref_logps/chosen": -39.13492202758789, |
| "ref_logps/rejected": -46.0317497253418, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.743063747882843, |
| "rewards/margins": 0.3547991216182709, |
| "rewards/rejected": -1.097862958908081, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 9.600908058178481, |
| "learning_rate": 3.9325842696629214e-07, |
| "logps/chosen": -50.78204345703125, |
| "logps/rejected": -56.04452896118164, |
| "loss": 0.6852, |
| "losses/dpo": 0.635813295841217, |
| "losses/sft": 1.73310124874115, |
| "losses/total": 0.635813295841217, |
| "ref_logps/chosen": -42.01323318481445, |
| "ref_logps/rejected": -45.830718994140625, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.8768815994262695, |
| "rewards/margins": 0.14449933171272278, |
| "rewards/rejected": -1.02138090133667, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 8.373342286084757, |
| "learning_rate": 3.9185393258426964e-07, |
| "logps/chosen": -47.82256317138672, |
| "logps/rejected": -54.956302642822266, |
| "loss": 0.6136, |
| "losses/dpo": 0.6849408149719238, |
| "losses/sft": 2.210094451904297, |
| "losses/total": 0.6849408149719238, |
| "ref_logps/chosen": -40.209930419921875, |
| "ref_logps/rejected": -44.30906295776367, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.7612636089324951, |
| "rewards/margins": 0.3034607172012329, |
| "rewards/rejected": -1.0647242069244385, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 9.637403138990082, |
| "learning_rate": 3.904494382022472e-07, |
| "logps/chosen": -48.02635955810547, |
| "logps/rejected": -57.71215057373047, |
| "loss": 0.5835, |
| "losses/dpo": 0.6933724880218506, |
| "losses/sft": 1.4713902473449707, |
| "losses/total": 0.6933724880218506, |
| "ref_logps/chosen": -40.86788558959961, |
| "ref_logps/rejected": -47.20375442504883, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.7158471345901489, |
| "rewards/margins": 0.33499258756637573, |
| "rewards/rejected": -1.0508397817611694, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 8.531064508491891, |
| "learning_rate": 3.890449438202247e-07, |
| "logps/chosen": -46.2841796875, |
| "logps/rejected": -54.74563980102539, |
| "loss": 0.6334, |
| "losses/dpo": 0.702929675579071, |
| "losses/sft": 1.360397219657898, |
| "losses/total": 0.702929675579071, |
| "ref_logps/chosen": -39.4974365234375, |
| "ref_logps/rejected": -45.235111236572266, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.6786739826202393, |
| "rewards/margins": 0.2723783850669861, |
| "rewards/rejected": -0.9510524272918701, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 41.566258002848386, |
| "learning_rate": 3.876404494382022e-07, |
| "logps/chosen": -44.022640228271484, |
| "logps/rejected": -55.926021575927734, |
| "loss": 0.596, |
| "losses/dpo": 0.5612522959709167, |
| "losses/sft": 1.8725919723510742, |
| "losses/total": 0.5612522959709167, |
| "ref_logps/chosen": -37.003326416015625, |
| "ref_logps/rejected": -45.59555435180664, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.701931357383728, |
| "rewards/margins": 0.3311149477958679, |
| "rewards/rejected": -1.0330464839935303, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 8.107636440875833, |
| "learning_rate": 3.8623595505617977e-07, |
| "logps/chosen": -46.37071990966797, |
| "logps/rejected": -57.88187026977539, |
| "loss": 0.5981, |
| "losses/dpo": 0.5683261156082153, |
| "losses/sft": 1.2367005348205566, |
| "losses/total": 0.5683261156082153, |
| "ref_logps/chosen": -40.31574630737305, |
| "ref_logps/rejected": -48.33371353149414, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6054975986480713, |
| "rewards/margins": 0.3493175208568573, |
| "rewards/rejected": -0.9548150300979614, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 7.976666948784577, |
| "learning_rate": 3.8483146067415727e-07, |
| "logps/chosen": -49.71910095214844, |
| "logps/rejected": -61.38713073730469, |
| "loss": 0.5628, |
| "losses/dpo": 0.6633545756340027, |
| "losses/sft": 2.0874011516571045, |
| "losses/total": 0.6633545756340027, |
| "ref_logps/chosen": -42.128177642822266, |
| "ref_logps/rejected": -49.5493278503418, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.7590923309326172, |
| "rewards/margins": 0.42468804121017456, |
| "rewards/rejected": -1.1837804317474365, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 8.273120167281697, |
| "learning_rate": 3.834269662921348e-07, |
| "logps/chosen": -44.52275848388672, |
| "logps/rejected": -56.69093322753906, |
| "loss": 0.5973, |
| "losses/dpo": 0.7617586851119995, |
| "losses/sft": 1.4152177572250366, |
| "losses/total": 0.7617586851119995, |
| "ref_logps/chosen": -38.226234436035156, |
| "ref_logps/rejected": -47.10552215576172, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6296522617340088, |
| "rewards/margins": 0.32888925075531006, |
| "rewards/rejected": -0.9585415124893188, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 8.426019615665346, |
| "learning_rate": 3.8202247191011233e-07, |
| "logps/chosen": -45.17867660522461, |
| "logps/rejected": -52.477989196777344, |
| "loss": 0.5956, |
| "losses/dpo": 0.8184994459152222, |
| "losses/sft": 2.1767871379852295, |
| "losses/total": 0.8184994459152222, |
| "ref_logps/chosen": -39.22052001953125, |
| "ref_logps/rejected": -42.9237060546875, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.5958150625228882, |
| "rewards/margins": 0.35961273312568665, |
| "rewards/rejected": -0.9554278254508972, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 9.445037206868065, |
| "learning_rate": 3.806179775280899e-07, |
| "logps/chosen": -49.57510757446289, |
| "logps/rejected": -57.41535568237305, |
| "loss": 0.626, |
| "losses/dpo": 0.5964499711990356, |
| "losses/sft": 1.574311375617981, |
| "losses/total": 0.5964499711990356, |
| "ref_logps/chosen": -42.10932159423828, |
| "ref_logps/rejected": -47.498348236083984, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.746578574180603, |
| "rewards/margins": 0.24512259662151337, |
| "rewards/rejected": -0.9917011260986328, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 8.142660343260747, |
| "learning_rate": 3.792134831460674e-07, |
| "logps/chosen": -49.39699935913086, |
| "logps/rejected": -60.31464385986328, |
| "loss": 0.5557, |
| "losses/dpo": 0.4981670677661896, |
| "losses/sft": 1.7636176347732544, |
| "losses/total": 0.4981670677661896, |
| "ref_logps/chosen": -42.645172119140625, |
| "ref_logps/rejected": -49.29505157470703, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.675183117389679, |
| "rewards/margins": 0.4267764091491699, |
| "rewards/rejected": -1.1019595861434937, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 10.371790207828976, |
| "learning_rate": 3.7780898876404495e-07, |
| "logps/chosen": -49.649539947509766, |
| "logps/rejected": -55.797752380371094, |
| "loss": 0.6236, |
| "losses/dpo": 0.6386290788650513, |
| "losses/sft": 1.9548349380493164, |
| "losses/total": 0.6386290788650513, |
| "ref_logps/chosen": -43.093387603759766, |
| "ref_logps/rejected": -46.55876541137695, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.6556151509284973, |
| "rewards/margins": 0.2682836949825287, |
| "rewards/rejected": -0.9238989353179932, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 8.551344033897433, |
| "learning_rate": 3.7640449438202245e-07, |
| "logps/chosen": -47.41435623168945, |
| "logps/rejected": -53.9222526550293, |
| "loss": 0.6097, |
| "losses/dpo": 0.5466079711914062, |
| "losses/sft": 1.6678849458694458, |
| "losses/total": 0.5466079711914062, |
| "ref_logps/chosen": -40.969482421875, |
| "ref_logps/rejected": -44.308204650878906, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.6444875001907349, |
| "rewards/margins": 0.3169165849685669, |
| "rewards/rejected": -0.9614041447639465, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 8.48986980564796, |
| "learning_rate": 3.75e-07, |
| "logps/chosen": -46.43178176879883, |
| "logps/rejected": -54.83501434326172, |
| "loss": 0.6116, |
| "losses/dpo": 0.7334883213043213, |
| "losses/sft": 2.06964111328125, |
| "losses/total": 0.7334883213043213, |
| "ref_logps/chosen": -39.70704650878906, |
| "ref_logps/rejected": -45.3802490234375, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.6724739074707031, |
| "rewards/margins": 0.2730027139186859, |
| "rewards/rejected": -0.9454765915870667, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 7.931029049263691, |
| "learning_rate": 3.735955056179775e-07, |
| "logps/chosen": -41.7554817199707, |
| "logps/rejected": -51.33645248413086, |
| "loss": 0.6118, |
| "losses/dpo": 0.6140174865722656, |
| "losses/sft": 1.9485254287719727, |
| "losses/total": 0.6140174865722656, |
| "ref_logps/chosen": -36.31658172607422, |
| "ref_logps/rejected": -43.00047302246094, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.5438905954360962, |
| "rewards/margins": 0.28970640897750854, |
| "rewards/rejected": -0.8335970640182495, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 7.961225244135859, |
| "learning_rate": 3.72191011235955e-07, |
| "logps/chosen": -45.26293182373047, |
| "logps/rejected": -53.40849304199219, |
| "loss": 0.5975, |
| "losses/dpo": 0.6238459348678589, |
| "losses/sft": 1.9288058280944824, |
| "losses/total": 0.6238459348678589, |
| "ref_logps/chosen": -39.287940979003906, |
| "ref_logps/rejected": -43.95112991333008, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.597498893737793, |
| "rewards/margins": 0.34823763370513916, |
| "rewards/rejected": -0.9457363486289978, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 8.463248361122965, |
| "learning_rate": 3.707865168539326e-07, |
| "logps/chosen": -46.658084869384766, |
| "logps/rejected": -55.08595275878906, |
| "loss": 0.6124, |
| "losses/dpo": 0.5780600309371948, |
| "losses/sft": 2.4361934661865234, |
| "losses/total": 0.5780600309371948, |
| "ref_logps/chosen": -40.17093276977539, |
| "ref_logps/rejected": -45.625953674316406, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.6487153172492981, |
| "rewards/margins": 0.29728472232818604, |
| "rewards/rejected": -0.9460000395774841, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 7.966461556458486, |
| "learning_rate": 3.693820224719101e-07, |
| "logps/chosen": -43.571929931640625, |
| "logps/rejected": -56.445987701416016, |
| "loss": 0.5996, |
| "losses/dpo": 0.6271636486053467, |
| "losses/sft": 1.7036837339401245, |
| "losses/total": 0.6271636486053467, |
| "ref_logps/chosen": -37.748313903808594, |
| "ref_logps/rejected": -46.95930862426758, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.5823614001274109, |
| "rewards/margins": 0.366305947303772, |
| "rewards/rejected": -0.9486674070358276, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 8.685706334967172, |
| "learning_rate": 3.6797752808988764e-07, |
| "logps/chosen": -45.6722412109375, |
| "logps/rejected": -58.07893371582031, |
| "loss": 0.5569, |
| "losses/dpo": 0.44357961416244507, |
| "losses/sft": 1.9906073808670044, |
| "losses/total": 0.44357961416244507, |
| "ref_logps/chosen": -39.666465759277344, |
| "ref_logps/rejected": -47.67109298706055, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.6005774736404419, |
| "rewards/margins": 0.44020622968673706, |
| "rewards/rejected": -1.0407837629318237, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 7.5465440969699475, |
| "learning_rate": 3.6657303370786514e-07, |
| "logps/chosen": -44.68495178222656, |
| "logps/rejected": -59.658729553222656, |
| "loss": 0.5418, |
| "losses/dpo": 0.6047704815864563, |
| "losses/sft": 1.8062564134597778, |
| "losses/total": 0.6047704815864563, |
| "ref_logps/chosen": -38.53544616699219, |
| "ref_logps/rejected": -48.757301330566406, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.6149506568908691, |
| "rewards/margins": 0.47519204020500183, |
| "rewards/rejected": -1.0901426076889038, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 8.040538818716184, |
| "learning_rate": 3.651685393258427e-07, |
| "logps/chosen": -49.753868103027344, |
| "logps/rejected": -55.77690124511719, |
| "loss": 0.5676, |
| "losses/dpo": 0.6332641243934631, |
| "losses/sft": 1.9821722507476807, |
| "losses/total": 0.6332641243934631, |
| "ref_logps/chosen": -43.2289924621582, |
| "ref_logps/rejected": -44.969783782958984, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.6524879932403564, |
| "rewards/margins": 0.4282234311103821, |
| "rewards/rejected": -1.0807113647460938, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 8.176454529563474, |
| "learning_rate": 3.637640449438202e-07, |
| "logps/chosen": -44.740989685058594, |
| "logps/rejected": -56.03924560546875, |
| "loss": 0.5426, |
| "losses/dpo": 0.5114879608154297, |
| "losses/sft": 1.7858185768127441, |
| "losses/total": 0.5114879608154297, |
| "ref_logps/chosen": -39.14986038208008, |
| "ref_logps/rejected": -45.894187927246094, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.5591133236885071, |
| "rewards/margins": 0.45539283752441406, |
| "rewards/rejected": -1.014506220817566, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 7.378583429433305, |
| "learning_rate": 3.6235955056179776e-07, |
| "logps/chosen": -43.69382095336914, |
| "logps/rejected": -58.01210021972656, |
| "loss": 0.5576, |
| "losses/dpo": 0.5225633978843689, |
| "losses/sft": 1.5681252479553223, |
| "losses/total": 0.5225633978843689, |
| "ref_logps/chosen": -37.7261848449707, |
| "ref_logps/rejected": -47.4239616394043, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.5967639684677124, |
| "rewards/margins": 0.46204984188079834, |
| "rewards/rejected": -1.0588138103485107, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 6.804949706358365, |
| "learning_rate": 3.6095505617977526e-07, |
| "logps/chosen": -41.86065673828125, |
| "logps/rejected": -56.17615509033203, |
| "loss": 0.541, |
| "losses/dpo": 0.5155816674232483, |
| "losses/sft": 1.66976797580719, |
| "losses/total": 0.5155816674232483, |
| "ref_logps/chosen": -36.93909454345703, |
| "ref_logps/rejected": -46.33450698852539, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.49215659499168396, |
| "rewards/margins": 0.492008239030838, |
| "rewards/rejected": -0.9841648936271667, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 9.08939326139503, |
| "learning_rate": 3.5955056179775277e-07, |
| "logps/chosen": -50.215492248535156, |
| "logps/rejected": -61.04364013671875, |
| "loss": 0.4877, |
| "losses/dpo": 0.4505589008331299, |
| "losses/sft": 2.045948028564453, |
| "losses/total": 0.4505589008331299, |
| "ref_logps/chosen": -44.16120147705078, |
| "ref_logps/rejected": -48.857383728027344, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.6054291725158691, |
| "rewards/margins": 0.6131964921951294, |
| "rewards/rejected": -1.218625545501709, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 7.204665546592892, |
| "learning_rate": 3.581460674157303e-07, |
| "logps/chosen": -44.662261962890625, |
| "logps/rejected": -56.869659423828125, |
| "loss": 0.5186, |
| "losses/dpo": 0.4171184301376343, |
| "losses/sft": 1.8761239051818848, |
| "losses/total": 0.4171184301376343, |
| "ref_logps/chosen": -38.65668487548828, |
| "ref_logps/rejected": -45.534423828125, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.6005581617355347, |
| "rewards/margins": 0.5329651832580566, |
| "rewards/rejected": -1.1335232257843018, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 7.887278895914157, |
| "learning_rate": 3.5674157303370783e-07, |
| "logps/chosen": -42.77589416503906, |
| "logps/rejected": -51.52098083496094, |
| "loss": 0.5407, |
| "losses/dpo": 0.35035043954849243, |
| "losses/sft": 1.5323569774627686, |
| "losses/total": 0.35035043954849243, |
| "ref_logps/chosen": -37.22514343261719, |
| "ref_logps/rejected": -41.38079833984375, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5550752282142639, |
| "rewards/margins": 0.45894336700439453, |
| "rewards/rejected": -1.0140186548233032, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 7.8842349887311505, |
| "learning_rate": 3.553370786516854e-07, |
| "logps/chosen": -48.38291549682617, |
| "logps/rejected": -60.9052619934082, |
| "loss": 0.5223, |
| "losses/dpo": 0.3222103714942932, |
| "losses/sft": 1.7116018533706665, |
| "losses/total": 0.3222103714942932, |
| "ref_logps/chosen": -41.83588409423828, |
| "ref_logps/rejected": -48.54487228393555, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.6547029614448547, |
| "rewards/margins": 0.5813360810279846, |
| "rewards/rejected": -1.2360389232635498, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 8.55836602139343, |
| "learning_rate": 3.539325842696629e-07, |
| "logps/chosen": -43.38884353637695, |
| "logps/rejected": -58.84518051147461, |
| "loss": 0.5957, |
| "losses/dpo": 0.458510160446167, |
| "losses/sft": 1.5300884246826172, |
| "losses/total": 0.458510160446167, |
| "ref_logps/chosen": -36.08906555175781, |
| "ref_logps/rejected": -47.359981536865234, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7299779653549194, |
| "rewards/margins": 0.41854166984558105, |
| "rewards/rejected": -1.14851975440979, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 7.848515280846644, |
| "learning_rate": 3.5252808988764045e-07, |
| "logps/chosen": -44.214759826660156, |
| "logps/rejected": -55.23337936401367, |
| "loss": 0.5469, |
| "losses/dpo": 0.47486573457717896, |
| "losses/sft": 1.7108842134475708, |
| "losses/total": 0.47486573457717896, |
| "ref_logps/chosen": -38.41852569580078, |
| "ref_logps/rejected": -44.74144744873047, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.5796229243278503, |
| "rewards/margins": 0.4695700407028198, |
| "rewards/rejected": -1.049193024635315, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 7.658781592826533, |
| "learning_rate": 3.51123595505618e-07, |
| "logps/chosen": -44.951683044433594, |
| "logps/rejected": -60.23070526123047, |
| "loss": 0.5266, |
| "losses/dpo": 0.5872490406036377, |
| "losses/sft": 1.8830925226211548, |
| "losses/total": 0.5872490406036377, |
| "ref_logps/chosen": -38.69530487060547, |
| "ref_logps/rejected": -48.41132354736328, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.6256377696990967, |
| "rewards/margins": 0.5562998056411743, |
| "rewards/rejected": -1.1819374561309814, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 8.54544198888784, |
| "learning_rate": 3.497191011235955e-07, |
| "logps/chosen": -48.944068908691406, |
| "logps/rejected": -63.39553451538086, |
| "loss": 0.533, |
| "losses/dpo": 0.40039166808128357, |
| "losses/sft": 1.862857460975647, |
| "losses/total": 0.40039166808128357, |
| "ref_logps/chosen": -42.56263732910156, |
| "ref_logps/rejected": -50.94089126586914, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6381431818008423, |
| "rewards/margins": 0.6073207259178162, |
| "rewards/rejected": -1.2454639673233032, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 7.389416730585067, |
| "learning_rate": 3.48314606741573e-07, |
| "logps/chosen": -38.91257858276367, |
| "logps/rejected": -49.71195602416992, |
| "loss": 0.5426, |
| "losses/dpo": 0.46995991468429565, |
| "losses/sft": 1.7020084857940674, |
| "losses/total": 0.46995991468429565, |
| "ref_logps/chosen": -33.769771575927734, |
| "ref_logps/rejected": -40.01531219482422, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.5142804384231567, |
| "rewards/margins": 0.45538395643234253, |
| "rewards/rejected": -0.969664454460144, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 7.674169098254074, |
| "learning_rate": 3.469101123595505e-07, |
| "logps/chosen": -51.57361602783203, |
| "logps/rejected": -61.665931701660156, |
| "loss": 0.4713, |
| "losses/dpo": 0.4245557487010956, |
| "losses/sft": 1.5430463552474976, |
| "losses/total": 0.4245557487010956, |
| "ref_logps/chosen": -44.85792541503906, |
| "ref_logps/rejected": -48.205955505371094, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.6715684533119202, |
| "rewards/margins": 0.6744291186332703, |
| "rewards/rejected": -1.3459975719451904, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 22.33237134875282, |
| "learning_rate": 3.4550561797752807e-07, |
| "logps/chosen": -47.884517669677734, |
| "logps/rejected": -58.32150650024414, |
| "loss": 0.5577, |
| "losses/dpo": 0.5011469721794128, |
| "losses/sft": 1.6741063594818115, |
| "losses/total": 0.5011469721794128, |
| "ref_logps/chosen": -40.72835159301758, |
| "ref_logps/rejected": -46.43974685668945, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.7156162261962891, |
| "rewards/margins": 0.47256001830101013, |
| "rewards/rejected": -1.1881763935089111, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 7.494096272004472, |
| "learning_rate": 3.441011235955056e-07, |
| "logps/chosen": -47.11280059814453, |
| "logps/rejected": -60.22679901123047, |
| "loss": 0.4938, |
| "losses/dpo": 0.5828070044517517, |
| "losses/sft": 1.9693294763565063, |
| "losses/total": 0.5828070044517517, |
| "ref_logps/chosen": -41.27467727661133, |
| "ref_logps/rejected": -47.46904754638672, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.5838119983673096, |
| "rewards/margins": 0.691962480545044, |
| "rewards/rejected": -1.2757744789123535, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 7.516820355286503, |
| "learning_rate": 3.4269662921348313e-07, |
| "logps/chosen": -40.18495559692383, |
| "logps/rejected": -54.60972595214844, |
| "loss": 0.5232, |
| "losses/dpo": 0.5568748116493225, |
| "losses/sft": 1.7009055614471436, |
| "losses/total": 0.5568748116493225, |
| "ref_logps/chosen": -33.816017150878906, |
| "ref_logps/rejected": -43.013160705566406, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.6368939280509949, |
| "rewards/margins": 0.5227622389793396, |
| "rewards/rejected": -1.159656047821045, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 7.502946402300403, |
| "learning_rate": 3.4129213483146064e-07, |
| "logps/chosen": -42.238433837890625, |
| "logps/rejected": -53.268348693847656, |
| "loss": 0.543, |
| "losses/dpo": 0.5467118620872498, |
| "losses/sft": 1.2100037336349487, |
| "losses/total": 0.5467118620872498, |
| "ref_logps/chosen": -35.477867126464844, |
| "ref_logps/rejected": -41.79436111450195, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.676056981086731, |
| "rewards/margins": 0.47134220600128174, |
| "rewards/rejected": -1.1473990678787231, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 7.772041407679626, |
| "learning_rate": 3.398876404494382e-07, |
| "logps/chosen": -44.26182174682617, |
| "logps/rejected": -64.4139633178711, |
| "loss": 0.4692, |
| "losses/dpo": 0.4075261354446411, |
| "losses/sft": 1.9415578842163086, |
| "losses/total": 0.4075261354446411, |
| "ref_logps/chosen": -37.64110565185547, |
| "ref_logps/rejected": -50.549678802490234, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.6620715260505676, |
| "rewards/margins": 0.7243567109107971, |
| "rewards/rejected": -1.3864283561706543, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 9.887168758150576, |
| "learning_rate": 3.3848314606741575e-07, |
| "logps/chosen": -46.856407165527344, |
| "logps/rejected": -60.13237380981445, |
| "loss": 0.5388, |
| "losses/dpo": 0.4830757975578308, |
| "losses/sft": 1.654982328414917, |
| "losses/total": 0.4830757975578308, |
| "ref_logps/chosen": -39.95355987548828, |
| "ref_logps/rejected": -47.57228088378906, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.6902844905853271, |
| "rewards/margins": 0.5657243132591248, |
| "rewards/rejected": -1.2560089826583862, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 8.0774394495718, |
| "learning_rate": 3.3707865168539325e-07, |
| "logps/chosen": -46.11971664428711, |
| "logps/rejected": -58.2076301574707, |
| "loss": 0.5571, |
| "losses/dpo": 0.6668601036071777, |
| "losses/sft": 2.1451334953308105, |
| "losses/total": 0.6668601036071777, |
| "ref_logps/chosen": -38.718963623046875, |
| "ref_logps/rejected": -45.97993850708008, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.7400756478309631, |
| "rewards/margins": 0.48269355297088623, |
| "rewards/rejected": -1.2227692604064941, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 7.6995260059404735, |
| "learning_rate": 3.356741573033708e-07, |
| "logps/chosen": -42.61933898925781, |
| "logps/rejected": -53.914058685302734, |
| "loss": 0.5563, |
| "losses/dpo": 0.5214600563049316, |
| "losses/sft": 1.6869933605194092, |
| "losses/total": 0.5214600563049316, |
| "ref_logps/chosen": -35.26099395751953, |
| "ref_logps/rejected": -41.69981002807617, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7358340620994568, |
| "rewards/margins": 0.4855908751487732, |
| "rewards/rejected": -1.2214250564575195, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 10.378363778579734, |
| "learning_rate": 3.3426966292134826e-07, |
| "logps/chosen": -46.86024856567383, |
| "logps/rejected": -61.01960754394531, |
| "loss": 0.5047, |
| "losses/dpo": 0.35230398178100586, |
| "losses/sft": 2.1814329624176025, |
| "losses/total": 0.35230398178100586, |
| "ref_logps/chosen": -40.29644775390625, |
| "ref_logps/rejected": -48.00575256347656, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6563804149627686, |
| "rewards/margins": 0.6450048685073853, |
| "rewards/rejected": -1.3013852834701538, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 8.023005518353788, |
| "learning_rate": 3.328651685393258e-07, |
| "logps/chosen": -48.07395553588867, |
| "logps/rejected": -60.29581069946289, |
| "loss": 0.5284, |
| "losses/dpo": 0.57308030128479, |
| "losses/sft": 2.2541966438293457, |
| "losses/total": 0.57308030128479, |
| "ref_logps/chosen": -39.92966842651367, |
| "ref_logps/rejected": -46.47784423828125, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8144292831420898, |
| "rewards/margins": 0.5673672556877136, |
| "rewards/rejected": -1.3817965984344482, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 7.252828799858416, |
| "learning_rate": 3.314606741573033e-07, |
| "logps/chosen": -45.51723861694336, |
| "logps/rejected": -60.294551849365234, |
| "loss": 0.4636, |
| "losses/dpo": 0.581541895866394, |
| "losses/sft": 1.6298561096191406, |
| "losses/total": 0.581541895866394, |
| "ref_logps/chosen": -38.64426040649414, |
| "ref_logps/rejected": -45.62995147705078, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.6872978210449219, |
| "rewards/margins": 0.7791618704795837, |
| "rewards/rejected": -1.4664596319198608, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 8.57918326492943, |
| "learning_rate": 3.300561797752809e-07, |
| "logps/chosen": -50.444820404052734, |
| "logps/rejected": -56.87160110473633, |
| "loss": 0.5883, |
| "losses/dpo": 0.5911461710929871, |
| "losses/sft": 2.1341657638549805, |
| "losses/total": 0.5911461710929871, |
| "ref_logps/chosen": -41.919921875, |
| "ref_logps/rejected": -43.998451232910156, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8524903059005737, |
| "rewards/margins": 0.43482455611228943, |
| "rewards/rejected": -1.2873148918151855, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 8.38374098065899, |
| "learning_rate": 3.2865168539325844e-07, |
| "logps/chosen": -50.57026672363281, |
| "logps/rejected": -61.819053649902344, |
| "loss": 0.5544, |
| "losses/dpo": 0.5535627603530884, |
| "losses/sft": 1.9382483959197998, |
| "losses/total": 0.5535627603530884, |
| "ref_logps/chosen": -41.4891357421875, |
| "ref_logps/rejected": -47.33740234375, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.9081130027770996, |
| "rewards/margins": 0.540052056312561, |
| "rewards/rejected": -1.4481650590896606, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 8.053746658561026, |
| "learning_rate": 3.2724719101123594e-07, |
| "logps/chosen": -49.71845245361328, |
| "logps/rejected": -55.962120056152344, |
| "loss": 0.5659, |
| "losses/dpo": 0.5589174628257751, |
| "losses/sft": 2.0584986209869385, |
| "losses/total": 0.5589174628257751, |
| "ref_logps/chosen": -41.30320739746094, |
| "ref_logps/rejected": -42.65535354614258, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.841524600982666, |
| "rewards/margins": 0.4891516864299774, |
| "rewards/rejected": -1.3306763172149658, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 8.128166160006492, |
| "learning_rate": 3.258426966292135e-07, |
| "logps/chosen": -49.184532165527344, |
| "logps/rejected": -62.28634262084961, |
| "loss": 0.5594, |
| "losses/dpo": 0.6036563515663147, |
| "losses/sft": 1.7208425998687744, |
| "losses/total": 0.6036563515663147, |
| "ref_logps/chosen": -40.15517044067383, |
| "ref_logps/rejected": -47.867679595947266, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.9029368162155151, |
| "rewards/margins": 0.5389291644096375, |
| "rewards/rejected": -1.441866159439087, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 7.785958027787598, |
| "learning_rate": 3.24438202247191e-07, |
| "logps/chosen": -46.6161003112793, |
| "logps/rejected": -64.03797912597656, |
| "loss": 0.5012, |
| "losses/dpo": 0.36227643489837646, |
| "losses/sft": 1.8801686763763428, |
| "losses/total": 0.36227643489837646, |
| "ref_logps/chosen": -39.068603515625, |
| "ref_logps/rejected": -49.793888092041016, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7547495365142822, |
| "rewards/margins": 0.6696599125862122, |
| "rewards/rejected": -1.42440927028656, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 8.005747191405192, |
| "learning_rate": 3.2303370786516856e-07, |
| "logps/chosen": -47.46510696411133, |
| "logps/rejected": -64.76780700683594, |
| "loss": 0.495, |
| "losses/dpo": 0.5762285590171814, |
| "losses/sft": 1.6507606506347656, |
| "losses/total": 0.5762285590171814, |
| "ref_logps/chosen": -39.22062683105469, |
| "ref_logps/rejected": -49.37548065185547, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.824447751045227, |
| "rewards/margins": 0.7147842645645142, |
| "rewards/rejected": -1.5392321348190308, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 8.587266771042302, |
| "learning_rate": 3.21629213483146e-07, |
| "logps/chosen": -44.891380310058594, |
| "logps/rejected": -63.128692626953125, |
| "loss": 0.476, |
| "losses/dpo": 0.3979690968990326, |
| "losses/sft": 1.9852135181427002, |
| "losses/total": 0.3979690968990326, |
| "ref_logps/chosen": -36.76803970336914, |
| "ref_logps/rejected": -47.980194091796875, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.8123339414596558, |
| "rewards/margins": 0.7025157809257507, |
| "rewards/rejected": -1.5148497819900513, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 8.326932342308945, |
| "learning_rate": 3.2022471910112357e-07, |
| "logps/chosen": -47.12702941894531, |
| "logps/rejected": -63.2083625793457, |
| "loss": 0.4967, |
| "losses/dpo": 0.46893593668937683, |
| "losses/sft": 2.4472603797912598, |
| "losses/total": 0.46893593668937683, |
| "ref_logps/chosen": -38.752357482910156, |
| "ref_logps/rejected": -48.1095085144043, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -0.8374671339988708, |
| "rewards/margins": 0.6724185943603516, |
| "rewards/rejected": -1.5098857879638672, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 8.055720143262379, |
| "learning_rate": 3.1882022471910107e-07, |
| "logps/chosen": -47.89605712890625, |
| "logps/rejected": -61.83879852294922, |
| "loss": 0.5001, |
| "losses/dpo": 0.5356773138046265, |
| "losses/sft": 2.059915781021118, |
| "losses/total": 0.5356773138046265, |
| "ref_logps/chosen": -38.51155090332031, |
| "ref_logps/rejected": -45.76554870605469, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9384507536888123, |
| "rewards/margins": 0.6688745021820068, |
| "rewards/rejected": -1.6073251962661743, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 7.586041013117629, |
| "learning_rate": 3.1741573033707863e-07, |
| "logps/chosen": -48.82367706298828, |
| "logps/rejected": -61.73891830444336, |
| "loss": 0.482, |
| "losses/dpo": 0.4217451214790344, |
| "losses/sft": 1.6581342220306396, |
| "losses/total": 0.4217451214790344, |
| "ref_logps/chosen": -40.12367248535156, |
| "ref_logps/rejected": -45.60664367675781, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.870000422000885, |
| "rewards/margins": 0.7432273626327515, |
| "rewards/rejected": -1.6132278442382812, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 8.642028616636065, |
| "learning_rate": 3.160112359550562e-07, |
| "logps/chosen": -48.638423919677734, |
| "logps/rejected": -60.67471694946289, |
| "loss": 0.5046, |
| "losses/dpo": 0.5264810919761658, |
| "losses/sft": 1.7720236778259277, |
| "losses/total": 0.5264810919761658, |
| "ref_logps/chosen": -39.852603912353516, |
| "ref_logps/rejected": -44.74626159667969, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8785818815231323, |
| "rewards/margins": 0.7142631411552429, |
| "rewards/rejected": -1.592844843864441, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 8.335982070287319, |
| "learning_rate": 3.146067415730337e-07, |
| "logps/chosen": -52.76824951171875, |
| "logps/rejected": -63.022315979003906, |
| "loss": 0.5192, |
| "losses/dpo": 0.4350988268852234, |
| "losses/sft": 2.3970541954040527, |
| "losses/total": 0.4350988268852234, |
| "ref_logps/chosen": -42.989315032958984, |
| "ref_logps/rejected": -47.05906677246094, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.9778933525085449, |
| "rewards/margins": 0.6184311509132385, |
| "rewards/rejected": -1.5963245630264282, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 8.969238567903124, |
| "learning_rate": 3.1320224719101125e-07, |
| "logps/chosen": -50.0399169921875, |
| "logps/rejected": -61.899139404296875, |
| "loss": 0.5552, |
| "losses/dpo": 0.6638925075531006, |
| "losses/sft": 2.114647150039673, |
| "losses/total": 0.6638925075531006, |
| "ref_logps/chosen": -40.658668518066406, |
| "ref_logps/rejected": -46.74747848510742, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.938124418258667, |
| "rewards/margins": 0.5770419239997864, |
| "rewards/rejected": -1.515166163444519, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 8.235186141597438, |
| "learning_rate": 3.1179775280898875e-07, |
| "logps/chosen": -52.105690002441406, |
| "logps/rejected": -62.85676574707031, |
| "loss": 0.5092, |
| "losses/dpo": 0.7253843545913696, |
| "losses/sft": 2.424346446990967, |
| "losses/total": 0.7253843545913696, |
| "ref_logps/chosen": -43.18442916870117, |
| "ref_logps/rejected": -47.48811340332031, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8921262621879578, |
| "rewards/margins": 0.6447390913963318, |
| "rewards/rejected": -1.5368653535842896, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 7.582218244857748, |
| "learning_rate": 3.103932584269663e-07, |
| "logps/chosen": -50.67258071899414, |
| "logps/rejected": -66.15370178222656, |
| "loss": 0.4585, |
| "losses/dpo": 0.45553505420684814, |
| "losses/sft": 1.935417890548706, |
| "losses/total": 0.45553505420684814, |
| "ref_logps/chosen": -41.14632034301758, |
| "ref_logps/rejected": -48.69713592529297, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.9526264667510986, |
| "rewards/margins": 0.7930303812026978, |
| "rewards/rejected": -1.745656967163086, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 8.60397689359237, |
| "learning_rate": 3.0898876404494376e-07, |
| "logps/chosen": -48.78178405761719, |
| "logps/rejected": -56.637123107910156, |
| "loss": 0.5424, |
| "losses/dpo": 0.46894580125808716, |
| "losses/sft": 1.4491811990737915, |
| "losses/total": 0.46894580125808716, |
| "ref_logps/chosen": -39.89250183105469, |
| "ref_logps/rejected": -41.9540901184082, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8889281749725342, |
| "rewards/margins": 0.5793753862380981, |
| "rewards/rejected": -1.4683035612106323, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 8.889595865380464, |
| "learning_rate": 3.075842696629213e-07, |
| "logps/chosen": -54.22248077392578, |
| "logps/rejected": -62.7822380065918, |
| "loss": 0.5381, |
| "losses/dpo": 0.5002489686012268, |
| "losses/sft": 1.6078195571899414, |
| "losses/total": 0.5002489686012268, |
| "ref_logps/chosen": -43.68180847167969, |
| "ref_logps/rejected": -46.31658172607422, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -1.0540671348571777, |
| "rewards/margins": 0.592498779296875, |
| "rewards/rejected": -1.6465659141540527, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 8.532472841711035, |
| "learning_rate": 3.0617977528089887e-07, |
| "logps/chosen": -49.95827865600586, |
| "logps/rejected": -61.806705474853516, |
| "loss": 0.5407, |
| "losses/dpo": 0.6499188542366028, |
| "losses/sft": 2.202641725540161, |
| "losses/total": 0.6499188542366028, |
| "ref_logps/chosen": -39.29413986206055, |
| "ref_logps/rejected": -44.9674072265625, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -1.0664141178131104, |
| "rewards/margins": 0.6175155639648438, |
| "rewards/rejected": -1.6839298009872437, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 8.909287859284307, |
| "learning_rate": 3.047752808988764e-07, |
| "logps/chosen": -51.441497802734375, |
| "logps/rejected": -62.11806106567383, |
| "loss": 0.527, |
| "losses/dpo": 0.2888008952140808, |
| "losses/sft": 1.8648221492767334, |
| "losses/total": 0.2888008952140808, |
| "ref_logps/chosen": -41.599273681640625, |
| "ref_logps/rejected": -46.178306579589844, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9842216372489929, |
| "rewards/margins": 0.6097543239593506, |
| "rewards/rejected": -1.5939760208129883, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 8.118331116269069, |
| "learning_rate": 3.0337078651685393e-07, |
| "logps/chosen": -50.35320281982422, |
| "logps/rejected": -68.11038208007812, |
| "loss": 0.4825, |
| "losses/dpo": 0.2915097177028656, |
| "losses/sft": 2.2731690406799316, |
| "losses/total": 0.2915097177028656, |
| "ref_logps/chosen": -40.37377166748047, |
| "ref_logps/rejected": -50.12421798706055, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.997943103313446, |
| "rewards/margins": 0.8006736636161804, |
| "rewards/rejected": -1.7986167669296265, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 7.611558494319546, |
| "learning_rate": 3.0196629213483144e-07, |
| "logps/chosen": -48.98621368408203, |
| "logps/rejected": -63.958377838134766, |
| "loss": 0.4771, |
| "losses/dpo": 0.8142632246017456, |
| "losses/sft": 2.000248670578003, |
| "losses/total": 0.8142632246017456, |
| "ref_logps/chosen": -39.60884475708008, |
| "ref_logps/rejected": -46.56087875366211, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9377367496490479, |
| "rewards/margins": 0.8020133376121521, |
| "rewards/rejected": -1.7397500276565552, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 7.87112340864733, |
| "learning_rate": 3.00561797752809e-07, |
| "logps/chosen": -50.70545959472656, |
| "logps/rejected": -60.79212951660156, |
| "loss": 0.481, |
| "losses/dpo": 0.4779345989227295, |
| "losses/sft": 2.002454996109009, |
| "losses/total": 0.4779345989227295, |
| "ref_logps/chosen": -40.73643112182617, |
| "ref_logps/rejected": -43.39889144897461, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.9969026446342468, |
| "rewards/margins": 0.7424213886260986, |
| "rewards/rejected": -1.7393240928649902, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 8.88113336772084, |
| "learning_rate": 2.991573033707865e-07, |
| "logps/chosen": -49.58301544189453, |
| "logps/rejected": -58.122802734375, |
| "loss": 0.5481, |
| "losses/dpo": 0.5509602427482605, |
| "losses/sft": 2.1736414432525635, |
| "losses/total": 0.5509602427482605, |
| "ref_logps/chosen": -39.8292350769043, |
| "ref_logps/rejected": -42.636268615722656, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.9753779768943787, |
| "rewards/margins": 0.5732761025428772, |
| "rewards/rejected": -1.5486540794372559, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 8.086846655989897, |
| "learning_rate": 2.9775280898876406e-07, |
| "logps/chosen": -49.57462692260742, |
| "logps/rejected": -59.01618194580078, |
| "loss": 0.5191, |
| "losses/dpo": 0.41641122102737427, |
| "losses/sft": 1.5094877481460571, |
| "losses/total": 0.41641122102737427, |
| "ref_logps/chosen": -40.067962646484375, |
| "ref_logps/rejected": -43.40235900878906, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9506663084030151, |
| "rewards/margins": 0.6107163429260254, |
| "rewards/rejected": -1.561382532119751, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 14.80695955125612, |
| "learning_rate": 2.9634831460674156e-07, |
| "logps/chosen": -49.670318603515625, |
| "logps/rejected": -63.068153381347656, |
| "loss": 0.4798, |
| "losses/dpo": 0.71369868516922, |
| "losses/sft": 2.1753716468811035, |
| "losses/total": 0.71369868516922, |
| "ref_logps/chosen": -40.42032241821289, |
| "ref_logps/rejected": -45.94530487060547, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.9249992966651917, |
| "rewards/margins": 0.7872861623764038, |
| "rewards/rejected": -1.7122855186462402, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 7.764172479016813, |
| "learning_rate": 2.9494382022471906e-07, |
| "logps/chosen": -46.467315673828125, |
| "logps/rejected": -61.956661224365234, |
| "loss": 0.4781, |
| "losses/dpo": 0.6086141467094421, |
| "losses/sft": 1.7402431964874268, |
| "losses/total": 0.6086141467094421, |
| "ref_logps/chosen": -37.99009704589844, |
| "ref_logps/rejected": -45.549415588378906, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.8477218747138977, |
| "rewards/margins": 0.7930029630661011, |
| "rewards/rejected": -1.640724778175354, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 8.22953072067457, |
| "learning_rate": 2.935393258426966e-07, |
| "logps/chosen": -50.86983871459961, |
| "logps/rejected": -62.61697769165039, |
| "loss": 0.5075, |
| "losses/dpo": 0.4520256519317627, |
| "losses/sft": 2.4947710037231445, |
| "losses/total": 0.4520256519317627, |
| "ref_logps/chosen": -40.587074279785156, |
| "ref_logps/rejected": -45.32030487060547, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.0282765626907349, |
| "rewards/margins": 0.7013900279998779, |
| "rewards/rejected": -1.7296665906906128, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 7.9279440455740735, |
| "learning_rate": 2.921348314606741e-07, |
| "logps/chosen": -44.96842956542969, |
| "logps/rejected": -62.975059509277344, |
| "loss": 0.5187, |
| "losses/dpo": 0.5396404266357422, |
| "losses/sft": 2.048635482788086, |
| "losses/total": 0.5396404266357422, |
| "ref_logps/chosen": -34.73614501953125, |
| "ref_logps/rejected": -46.000450134277344, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -1.023228406906128, |
| "rewards/margins": 0.6742324829101562, |
| "rewards/rejected": -1.6974608898162842, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 7.307548456693543, |
| "learning_rate": 2.907303370786517e-07, |
| "logps/chosen": -45.52065658569336, |
| "logps/rejected": -59.8885498046875, |
| "loss": 0.4755, |
| "losses/dpo": 0.40278539061546326, |
| "losses/sft": 2.2951760292053223, |
| "losses/total": 0.40278539061546326, |
| "ref_logps/chosen": -37.46648406982422, |
| "ref_logps/rejected": -44.16557312011719, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.8054174184799194, |
| "rewards/margins": 0.7668801546096802, |
| "rewards/rejected": -1.5722976922988892, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 8.912502235874074, |
| "learning_rate": 2.893258426966292e-07, |
| "logps/chosen": -45.76905059814453, |
| "logps/rejected": -57.87416076660156, |
| "loss": 0.5655, |
| "losses/dpo": 0.5564082860946655, |
| "losses/sft": 1.3282924890518188, |
| "losses/total": 0.5564082860946655, |
| "ref_logps/chosen": -36.90949630737305, |
| "ref_logps/rejected": -43.09407424926758, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.8859553933143616, |
| "rewards/margins": 0.5920534133911133, |
| "rewards/rejected": -1.47800874710083, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 7.963453038939498, |
| "learning_rate": 2.8792134831460674e-07, |
| "logps/chosen": -48.343929290771484, |
| "logps/rejected": -68.3438720703125, |
| "loss": 0.4613, |
| "losses/dpo": 0.4805658161640167, |
| "losses/sft": 2.2470126152038574, |
| "losses/total": 0.4805658161640167, |
| "ref_logps/chosen": -40.115596771240234, |
| "ref_logps/rejected": -51.82239532470703, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.8228334784507751, |
| "rewards/margins": 0.8293145895004272, |
| "rewards/rejected": -1.6521480083465576, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 8.622412292262098, |
| "learning_rate": 2.8651685393258425e-07, |
| "logps/chosen": -51.2635383605957, |
| "logps/rejected": -63.78586959838867, |
| "loss": 0.4851, |
| "losses/dpo": 0.38889288902282715, |
| "losses/sft": 2.7701683044433594, |
| "losses/total": 0.38889288902282715, |
| "ref_logps/chosen": -42.14338684082031, |
| "ref_logps/rejected": -46.94654083251953, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9120146036148071, |
| "rewards/margins": 0.7719184756278992, |
| "rewards/rejected": -1.683933138847351, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 7.759600823991896, |
| "learning_rate": 2.851123595505618e-07, |
| "logps/chosen": -46.17103958129883, |
| "logps/rejected": -60.57073974609375, |
| "loss": 0.5056, |
| "losses/dpo": 0.6096426248550415, |
| "losses/sft": 2.101557493209839, |
| "losses/total": 0.6096426248550415, |
| "ref_logps/chosen": -37.342018127441406, |
| "ref_logps/rejected": -44.97611999511719, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8829020261764526, |
| "rewards/margins": 0.6765601634979248, |
| "rewards/rejected": -1.559462308883667, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 8.401972111307101, |
| "learning_rate": 2.8370786516853936e-07, |
| "logps/chosen": -49.945350646972656, |
| "logps/rejected": -64.91703033447266, |
| "loss": 0.4635, |
| "losses/dpo": 0.5600602626800537, |
| "losses/sft": 1.5397871732711792, |
| "losses/total": 0.5600602626800537, |
| "ref_logps/chosen": -41.32893371582031, |
| "ref_logps/rejected": -47.677955627441406, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8616417050361633, |
| "rewards/margins": 0.8622665405273438, |
| "rewards/rejected": -1.7239081859588623, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 8.847895232096153, |
| "learning_rate": 2.823033707865168e-07, |
| "logps/chosen": -45.08359146118164, |
| "logps/rejected": -65.03630828857422, |
| "loss": 0.5214, |
| "losses/dpo": 0.5835427045822144, |
| "losses/sft": 1.4693069458007812, |
| "losses/total": 0.5835427045822144, |
| "ref_logps/chosen": -35.359466552734375, |
| "ref_logps/rejected": -47.392391204833984, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9724127650260925, |
| "rewards/margins": 0.7919799089431763, |
| "rewards/rejected": -1.764392614364624, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 8.759081047535012, |
| "learning_rate": 2.8089887640449437e-07, |
| "logps/chosen": -45.32444763183594, |
| "logps/rejected": -63.420326232910156, |
| "loss": 0.5066, |
| "losses/dpo": 0.38791435956954956, |
| "losses/sft": 1.907132625579834, |
| "losses/total": 0.38791435956954956, |
| "ref_logps/chosen": -35.994544982910156, |
| "ref_logps/rejected": -46.32176971435547, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.932990550994873, |
| "rewards/margins": 0.776865541934967, |
| "rewards/rejected": -1.7098561525344849, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 8.54168900663033, |
| "learning_rate": 2.794943820224719e-07, |
| "logps/chosen": -53.67341613769531, |
| "logps/rejected": -73.58582305908203, |
| "loss": 0.4837, |
| "losses/dpo": 0.5046126842498779, |
| "losses/sft": 2.1525635719299316, |
| "losses/total": 0.5046126842498779, |
| "ref_logps/chosen": -42.92031478881836, |
| "ref_logps/rejected": -54.27576446533203, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0753098726272583, |
| "rewards/margins": 0.8556962013244629, |
| "rewards/rejected": -1.9310060739517212, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 9.360560561178314, |
| "learning_rate": 2.7808988764044943e-07, |
| "logps/chosen": -49.29429626464844, |
| "logps/rejected": -58.852745056152344, |
| "loss": 0.5219, |
| "losses/dpo": 0.6207550168037415, |
| "losses/sft": 1.4964567422866821, |
| "losses/total": 0.6207550168037415, |
| "ref_logps/chosen": -39.879859924316406, |
| "ref_logps/rejected": -42.66207504272461, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9414433836936951, |
| "rewards/margins": 0.6776232719421387, |
| "rewards/rejected": -1.6190667152404785, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 7.766094681277619, |
| "learning_rate": 2.7668539325842694e-07, |
| "logps/chosen": -44.67707443237305, |
| "logps/rejected": -62.46302032470703, |
| "loss": 0.4513, |
| "losses/dpo": 0.3586122393608093, |
| "losses/sft": 1.7712942361831665, |
| "losses/total": 0.3586122393608093, |
| "ref_logps/chosen": -35.756446838378906, |
| "ref_logps/rejected": -45.0746955871582, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8920624256134033, |
| "rewards/margins": 0.8467705249786377, |
| "rewards/rejected": -1.738832950592041, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 8.387391054319354, |
| "learning_rate": 2.752808988764045e-07, |
| "logps/chosen": -51.25703430175781, |
| "logps/rejected": -66.00263977050781, |
| "loss": 0.4694, |
| "losses/dpo": 0.4337347447872162, |
| "losses/sft": 2.2087419033050537, |
| "losses/total": 0.4337347447872162, |
| "ref_logps/chosen": -41.61771011352539, |
| "ref_logps/rejected": -48.16603469848633, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.96393221616745, |
| "rewards/margins": 0.8197280764579773, |
| "rewards/rejected": -1.7836604118347168, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 8.762356246957351, |
| "learning_rate": 2.73876404494382e-07, |
| "logps/chosen": -48.135372161865234, |
| "logps/rejected": -59.56908416748047, |
| "loss": 0.5349, |
| "losses/dpo": 0.5300882458686829, |
| "losses/sft": 1.96072256565094, |
| "losses/total": 0.5300882458686829, |
| "ref_logps/chosen": -39.096519470214844, |
| "ref_logps/rejected": -44.62074279785156, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.9038856625556946, |
| "rewards/margins": 0.5909484028816223, |
| "rewards/rejected": -1.4948341846466064, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 8.01612481161353, |
| "learning_rate": 2.7247191011235955e-07, |
| "logps/chosen": -48.03499984741211, |
| "logps/rejected": -58.95391845703125, |
| "loss": 0.4596, |
| "losses/dpo": 0.5687890648841858, |
| "losses/sft": 1.5117721557617188, |
| "losses/total": 0.5687890648841858, |
| "ref_logps/chosen": -40.076637268066406, |
| "ref_logps/rejected": -42.60528564453125, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -0.795836865901947, |
| "rewards/margins": 0.839026153087616, |
| "rewards/rejected": -1.634863018989563, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 8.170779294474082, |
| "learning_rate": 2.710674157303371e-07, |
| "logps/chosen": -53.97732925415039, |
| "logps/rejected": -66.41366577148438, |
| "loss": 0.4697, |
| "losses/dpo": 0.440776526927948, |
| "losses/sft": 2.09440279006958, |
| "losses/total": 0.440776526927948, |
| "ref_logps/chosen": -44.04298782348633, |
| "ref_logps/rejected": -48.34318161010742, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -0.9934341311454773, |
| "rewards/margins": 0.8136138319969177, |
| "rewards/rejected": -1.8070482015609741, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 8.062617165532144, |
| "learning_rate": 2.6966292134831456e-07, |
| "logps/chosen": -51.31819152832031, |
| "logps/rejected": -66.41058349609375, |
| "loss": 0.4809, |
| "losses/dpo": 0.5263036489486694, |
| "losses/sft": 1.6697288751602173, |
| "losses/total": 0.5263036489486694, |
| "ref_logps/chosen": -41.211219787597656, |
| "ref_logps/rejected": -48.88929748535156, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0106972455978394, |
| "rewards/margins": 0.7414315938949585, |
| "rewards/rejected": -1.7521288394927979, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 8.232167547461342, |
| "learning_rate": 2.682584269662921e-07, |
| "logps/chosen": -50.255958557128906, |
| "logps/rejected": -64.30512237548828, |
| "loss": 0.4906, |
| "losses/dpo": 0.5959848761558533, |
| "losses/sft": 2.509490966796875, |
| "losses/total": 0.5959848761558533, |
| "ref_logps/chosen": -40.21955490112305, |
| "ref_logps/rejected": -46.348358154296875, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.0036402940750122, |
| "rewards/margins": 0.7920358180999756, |
| "rewards/rejected": -1.7956762313842773, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 8.658885149622282, |
| "learning_rate": 2.668539325842696e-07, |
| "logps/chosen": -48.23283004760742, |
| "logps/rejected": -63.8531608581543, |
| "loss": 0.5012, |
| "losses/dpo": 0.3958526849746704, |
| "losses/sft": 1.8156052827835083, |
| "losses/total": 0.3958526849746704, |
| "ref_logps/chosen": -39.19361877441406, |
| "ref_logps/rejected": -46.780818939208984, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9039209485054016, |
| "rewards/margins": 0.8033130168914795, |
| "rewards/rejected": -1.7072339057922363, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 9.062222930248339, |
| "learning_rate": 2.654494382022472e-07, |
| "logps/chosen": -48.18266296386719, |
| "logps/rejected": -60.7551155090332, |
| "loss": 0.5163, |
| "losses/dpo": 0.5344985127449036, |
| "losses/sft": 1.5162503719329834, |
| "losses/total": 0.5344985127449036, |
| "ref_logps/chosen": -38.624839782714844, |
| "ref_logps/rejected": -43.208770751953125, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.9557825326919556, |
| "rewards/margins": 0.798852264881134, |
| "rewards/rejected": -1.7546348571777344, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 8.337997292070591, |
| "learning_rate": 2.640449438202247e-07, |
| "logps/chosen": -46.034690856933594, |
| "logps/rejected": -58.21810531616211, |
| "loss": 0.5159, |
| "losses/dpo": 0.5175392627716064, |
| "losses/sft": 1.6583523750305176, |
| "losses/total": 0.5175392627716064, |
| "ref_logps/chosen": -37.22336959838867, |
| "ref_logps/rejected": -42.30153274536133, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8811318278312683, |
| "rewards/margins": 0.7105254530906677, |
| "rewards/rejected": -1.5916571617126465, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 8.605777081243177, |
| "learning_rate": 2.6264044943820224e-07, |
| "logps/chosen": -49.07252502441406, |
| "logps/rejected": -62.991764068603516, |
| "loss": 0.5234, |
| "losses/dpo": 0.3841923177242279, |
| "losses/sft": 2.0660533905029297, |
| "losses/total": 0.3841923177242279, |
| "ref_logps/chosen": -38.91202163696289, |
| "ref_logps/rejected": -45.201744079589844, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -1.0160505771636963, |
| "rewards/margins": 0.7629517316818237, |
| "rewards/rejected": -1.7790021896362305, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 8.594902163030543, |
| "learning_rate": 2.612359550561798e-07, |
| "logps/chosen": -48.94342803955078, |
| "logps/rejected": -67.45938110351562, |
| "loss": 0.4628, |
| "losses/dpo": 0.5517236590385437, |
| "losses/sft": 1.831754446029663, |
| "losses/total": 0.5517236590385437, |
| "ref_logps/chosen": -39.35862731933594, |
| "ref_logps/rejected": -48.98806381225586, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9584797620773315, |
| "rewards/margins": 0.8886520862579346, |
| "rewards/rejected": -1.8471317291259766, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 9.393987371477554, |
| "learning_rate": 2.598314606741573e-07, |
| "logps/chosen": -51.1754035949707, |
| "logps/rejected": -62.90049362182617, |
| "loss": 0.538, |
| "losses/dpo": 0.4337689280509949, |
| "losses/sft": 1.871553897857666, |
| "losses/total": 0.4337689280509949, |
| "ref_logps/chosen": -40.097930908203125, |
| "ref_logps/rejected": -44.89445877075195, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -1.1077474355697632, |
| "rewards/margins": 0.6928560137748718, |
| "rewards/rejected": -1.8006032705307007, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 8.268094387246107, |
| "learning_rate": 2.5842696629213486e-07, |
| "logps/chosen": -46.02891159057617, |
| "logps/rejected": -58.77110290527344, |
| "loss": 0.5183, |
| "losses/dpo": 0.3652556836605072, |
| "losses/sft": 2.1132707595825195, |
| "losses/total": 0.3652556836605072, |
| "ref_logps/chosen": -37.71674346923828, |
| "ref_logps/rejected": -43.298362731933594, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8312174677848816, |
| "rewards/margins": 0.7160569429397583, |
| "rewards/rejected": -1.5472744703292847, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 8.156887652455024, |
| "learning_rate": 2.5702247191011236e-07, |
| "logps/chosen": -53.53913879394531, |
| "logps/rejected": -65.96989440917969, |
| "loss": 0.454, |
| "losses/dpo": 0.6029412746429443, |
| "losses/sft": 2.5100276470184326, |
| "losses/total": 0.6029412746429443, |
| "ref_logps/chosen": -43.845726013183594, |
| "ref_logps/rejected": -47.28349304199219, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.9693412780761719, |
| "rewards/margins": 0.899299681186676, |
| "rewards/rejected": -1.8686408996582031, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 9.181481809417177, |
| "learning_rate": 2.5561797752808987e-07, |
| "logps/chosen": -51.8013801574707, |
| "logps/rejected": -63.46139907836914, |
| "loss": 0.5059, |
| "losses/dpo": 0.14845682680606842, |
| "losses/sft": 1.4499884843826294, |
| "losses/total": 0.14845682680606842, |
| "ref_logps/chosen": -42.47813415527344, |
| "ref_logps/rejected": -46.913028717041016, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9323242902755737, |
| "rewards/margins": 0.7225131988525391, |
| "rewards/rejected": -1.6548374891281128, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 15.612026068166484, |
| "learning_rate": 2.5421348314606737e-07, |
| "logps/chosen": -53.003204345703125, |
| "logps/rejected": -66.69940948486328, |
| "loss": 0.4433, |
| "losses/dpo": 0.37596985697746277, |
| "losses/sft": 1.6896804571151733, |
| "losses/total": 0.37596985697746277, |
| "ref_logps/chosen": -43.648799896240234, |
| "ref_logps/rejected": -48.52040481567383, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -0.9354407787322998, |
| "rewards/margins": 0.8824598789215088, |
| "rewards/rejected": -1.8179006576538086, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 9.017380971745943, |
| "learning_rate": 2.5280898876404493e-07, |
| "logps/chosen": -53.18143081665039, |
| "logps/rejected": -68.83674621582031, |
| "loss": 0.5005, |
| "losses/dpo": 0.9546025395393372, |
| "losses/sft": 2.2383298873901367, |
| "losses/total": 0.9546025395393372, |
| "ref_logps/chosen": -42.14701843261719, |
| "ref_logps/rejected": -50.13629913330078, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.103441834449768, |
| "rewards/margins": 0.7666029334068298, |
| "rewards/rejected": -1.8700445890426636, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 7.002144270086572, |
| "learning_rate": 2.5140449438202243e-07, |
| "logps/chosen": -39.812992095947266, |
| "logps/rejected": -58.04911804199219, |
| "loss": 0.4312, |
| "losses/dpo": 0.3237588107585907, |
| "losses/sft": 1.9024207592010498, |
| "losses/total": 0.3237588107585907, |
| "ref_logps/chosen": -31.948955535888672, |
| "ref_logps/rejected": -40.56779479980469, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.7864038348197937, |
| "rewards/margins": 0.9617283940315247, |
| "rewards/rejected": -1.7481321096420288, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 8.018556807115827, |
| "learning_rate": 2.5e-07, |
| "logps/chosen": -47.59326934814453, |
| "logps/rejected": -64.22843170166016, |
| "loss": 0.4616, |
| "losses/dpo": 0.3011893630027771, |
| "losses/sft": 1.4501286745071411, |
| "losses/total": 0.3011893630027771, |
| "ref_logps/chosen": -37.48702621459961, |
| "ref_logps/rejected": -45.30727767944336, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.0106242895126343, |
| "rewards/margins": 0.8814913630485535, |
| "rewards/rejected": -1.892115592956543, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 9.421297346982232, |
| "learning_rate": 2.485955056179775e-07, |
| "logps/chosen": -52.299922943115234, |
| "logps/rejected": -62.2135009765625, |
| "loss": 0.5647, |
| "losses/dpo": 0.6175757050514221, |
| "losses/sft": 2.058591365814209, |
| "losses/total": 0.6175757050514221, |
| "ref_logps/chosen": -41.575767517089844, |
| "ref_logps/rejected": -45.39363479614258, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.0724154710769653, |
| "rewards/margins": 0.6095717549324036, |
| "rewards/rejected": -1.6819872856140137, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 10.845286708552022, |
| "learning_rate": 2.4719101123595505e-07, |
| "logps/chosen": -49.824981689453125, |
| "logps/rejected": -61.322139739990234, |
| "loss": 0.4823, |
| "losses/dpo": 0.5226192474365234, |
| "losses/sft": 1.8831043243408203, |
| "losses/total": 0.5226192474365234, |
| "ref_logps/chosen": -39.458709716796875, |
| "ref_logps/rejected": -43.33076477050781, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.0366266965866089, |
| "rewards/margins": 0.7625109553337097, |
| "rewards/rejected": -1.7991377115249634, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 7.736788176006532, |
| "learning_rate": 2.4578651685393255e-07, |
| "logps/chosen": -48.30406951904297, |
| "logps/rejected": -62.53293228149414, |
| "loss": 0.4473, |
| "losses/dpo": 0.24700236320495605, |
| "losses/sft": 1.5504862070083618, |
| "losses/total": 0.24700236320495605, |
| "ref_logps/chosen": -38.438228607177734, |
| "ref_logps/rejected": -43.876705169677734, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.9865838289260864, |
| "rewards/margins": 0.8790390491485596, |
| "rewards/rejected": -1.8656229972839355, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 7.9918716943613894, |
| "learning_rate": 2.443820224719101e-07, |
| "logps/chosen": -46.76765441894531, |
| "logps/rejected": -63.072418212890625, |
| "loss": 0.4597, |
| "losses/dpo": 0.5004298686981201, |
| "losses/sft": 2.039869785308838, |
| "losses/total": 0.5004298686981201, |
| "ref_logps/chosen": -37.515079498291016, |
| "ref_logps/rejected": -44.76315689086914, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.9252572655677795, |
| "rewards/margins": 0.9056685566902161, |
| "rewards/rejected": -1.8309259414672852, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 8.812574527536418, |
| "learning_rate": 2.429775280898876e-07, |
| "logps/chosen": -48.68953323364258, |
| "logps/rejected": -70.00579071044922, |
| "loss": 0.4842, |
| "losses/dpo": 0.5064282417297363, |
| "losses/sft": 1.860622763633728, |
| "losses/total": 0.5064282417297363, |
| "ref_logps/chosen": -37.93241500854492, |
| "ref_logps/rejected": -49.59477996826172, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -1.0757118463516235, |
| "rewards/margins": 0.9653894305229187, |
| "rewards/rejected": -2.0411009788513184, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 8.262706110631338, |
| "learning_rate": 2.4157303370786517e-07, |
| "logps/chosen": -51.853031158447266, |
| "logps/rejected": -66.55961608886719, |
| "loss": 0.4643, |
| "losses/dpo": 0.32682162523269653, |
| "losses/sft": 2.5111522674560547, |
| "losses/total": 0.32682162523269653, |
| "ref_logps/chosen": -42.025306701660156, |
| "ref_logps/rejected": -47.904380798339844, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9827719330787659, |
| "rewards/margins": 0.8827516436576843, |
| "rewards/rejected": -1.8655235767364502, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 9.91340992789526, |
| "learning_rate": 2.401685393258427e-07, |
| "logps/chosen": -51.620609283447266, |
| "logps/rejected": -61.01588821411133, |
| "loss": 0.5382, |
| "losses/dpo": 0.47428151965141296, |
| "losses/sft": 1.9888023138046265, |
| "losses/total": 0.47428151965141296, |
| "ref_logps/chosen": -40.93183135986328, |
| "ref_logps/rejected": -43.67429733276367, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -1.0688778162002563, |
| "rewards/margins": 0.6652814745903015, |
| "rewards/rejected": -1.7341593503952026, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 8.353660910725022, |
| "learning_rate": 2.3876404494382023e-07, |
| "logps/chosen": -49.402103424072266, |
| "logps/rejected": -61.825992584228516, |
| "loss": 0.4796, |
| "losses/dpo": 0.4575900137424469, |
| "losses/sft": 2.0813965797424316, |
| "losses/total": 0.4575900137424469, |
| "ref_logps/chosen": -39.205528259277344, |
| "ref_logps/rejected": -43.332237243652344, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.019657850265503, |
| "rewards/margins": 0.8297175765037537, |
| "rewards/rejected": -1.8493753671646118, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 8.489694346068541, |
| "learning_rate": 2.3735955056179774e-07, |
| "logps/chosen": -49.869972229003906, |
| "logps/rejected": -61.926387786865234, |
| "loss": 0.5007, |
| "losses/dpo": 0.5196319222450256, |
| "losses/sft": 2.3197567462921143, |
| "losses/total": 0.5196319222450256, |
| "ref_logps/chosen": -40.345924377441406, |
| "ref_logps/rejected": -43.80282211303711, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.9524051547050476, |
| "rewards/margins": 0.8599514365196228, |
| "rewards/rejected": -1.8123565912246704, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 8.455711622023179, |
| "learning_rate": 2.3595505617977527e-07, |
| "logps/chosen": -48.83377456665039, |
| "logps/rejected": -62.39826202392578, |
| "loss": 0.4853, |
| "losses/dpo": 0.33744513988494873, |
| "losses/sft": 2.174797296524048, |
| "losses/total": 0.33744513988494873, |
| "ref_logps/chosen": -38.7586555480957, |
| "ref_logps/rejected": -44.35865783691406, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0075117349624634, |
| "rewards/margins": 0.7964487075805664, |
| "rewards/rejected": -1.8039604425430298, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 8.501384419530382, |
| "learning_rate": 2.345505617977528e-07, |
| "logps/chosen": -46.079429626464844, |
| "logps/rejected": -62.38417053222656, |
| "loss": 0.4593, |
| "losses/dpo": 0.45904022455215454, |
| "losses/sft": 2.14939546585083, |
| "losses/total": 0.45904022455215454, |
| "ref_logps/chosen": -37.05137634277344, |
| "ref_logps/rejected": -44.551025390625, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.9028058052062988, |
| "rewards/margins": 0.8805083632469177, |
| "rewards/rejected": -1.7833141088485718, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 8.42568627673927, |
| "learning_rate": 2.331460674157303e-07, |
| "logps/chosen": -53.63593292236328, |
| "logps/rejected": -67.56592559814453, |
| "loss": 0.4141, |
| "losses/dpo": 0.35481470823287964, |
| "losses/sft": 1.9238269329071045, |
| "losses/total": 0.35481470823287964, |
| "ref_logps/chosen": -43.60770797729492, |
| "ref_logps/rejected": -47.71710205078125, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.0028222799301147, |
| "rewards/margins": 0.9820606112480164, |
| "rewards/rejected": -1.9848829507827759, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 8.985097587921354, |
| "learning_rate": 2.3174157303370786e-07, |
| "logps/chosen": -49.890769958496094, |
| "logps/rejected": -64.36235046386719, |
| "loss": 0.5005, |
| "losses/dpo": 0.4699353277683258, |
| "losses/sft": 1.5774719715118408, |
| "losses/total": 0.4699353277683258, |
| "ref_logps/chosen": -39.884613037109375, |
| "ref_logps/rejected": -46.45123291015625, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0006158351898193, |
| "rewards/margins": 0.7904958724975586, |
| "rewards/rejected": -1.7911114692687988, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 8.37260934759885, |
| "learning_rate": 2.303370786516854e-07, |
| "logps/chosen": -47.88700485229492, |
| "logps/rejected": -65.87992095947266, |
| "loss": 0.4563, |
| "losses/dpo": 0.4289396107196808, |
| "losses/sft": 1.9490468502044678, |
| "losses/total": 0.4289396107196808, |
| "ref_logps/chosen": -38.816673278808594, |
| "ref_logps/rejected": -48.137718200683594, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.9070336222648621, |
| "rewards/margins": 0.8671874403953552, |
| "rewards/rejected": -1.7742209434509277, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 8.439180796005859, |
| "learning_rate": 2.2893258426966292e-07, |
| "logps/chosen": -47.75567626953125, |
| "logps/rejected": -59.802364349365234, |
| "loss": 0.5339, |
| "losses/dpo": 0.3888584077358246, |
| "losses/sft": 1.453101396560669, |
| "losses/total": 0.3888584077358246, |
| "ref_logps/chosen": -38.37287521362305, |
| "ref_logps/rejected": -43.31857681274414, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.9382801055908203, |
| "rewards/margins": 0.7100984454154968, |
| "rewards/rejected": -1.6483784914016724, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 8.820079708795298, |
| "learning_rate": 2.2752808988764045e-07, |
| "logps/chosen": -53.505958557128906, |
| "logps/rejected": -66.17211151123047, |
| "loss": 0.4792, |
| "losses/dpo": 0.5977008938789368, |
| "losses/sft": 1.5070393085479736, |
| "losses/total": 0.5977008938789368, |
| "ref_logps/chosen": -42.90943145751953, |
| "ref_logps/rejected": -47.009498596191406, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.0596526861190796, |
| "rewards/margins": 0.8566086292266846, |
| "rewards/rejected": -1.9162614345550537, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 8.954294680805216, |
| "learning_rate": 2.2612359550561795e-07, |
| "logps/chosen": -51.98886489868164, |
| "logps/rejected": -65.2154769897461, |
| "loss": 0.4863, |
| "losses/dpo": 0.4061537981033325, |
| "losses/sft": 1.7505851984024048, |
| "losses/total": 0.4061537981033325, |
| "ref_logps/chosen": -41.45209503173828, |
| "ref_logps/rejected": -46.05805969238281, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.0536775588989258, |
| "rewards/margins": 0.8620648980140686, |
| "rewards/rejected": -1.9157423973083496, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 8.276096992832155, |
| "learning_rate": 2.2471910112359549e-07, |
| "logps/chosen": -46.52098846435547, |
| "logps/rejected": -60.28398895263672, |
| "loss": 0.4849, |
| "losses/dpo": 0.4493502676486969, |
| "losses/sft": 1.8399590253829956, |
| "losses/total": 0.4493502676486969, |
| "ref_logps/chosen": -37.41412353515625, |
| "ref_logps/rejected": -42.95138931274414, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.9106867909431458, |
| "rewards/margins": 0.8225734233856201, |
| "rewards/rejected": -1.733260154724121, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 9.148667637325849, |
| "learning_rate": 2.2331460674157302e-07, |
| "logps/chosen": -49.735443115234375, |
| "logps/rejected": -68.21788787841797, |
| "loss": 0.49, |
| "losses/dpo": 0.2873924672603607, |
| "losses/sft": 1.5266362428665161, |
| "losses/total": 0.2873924672603607, |
| "ref_logps/chosen": -39.63804244995117, |
| "ref_logps/rejected": -49.29356384277344, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0097399950027466, |
| "rewards/margins": 0.8826919198036194, |
| "rewards/rejected": -1.8924317359924316, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 8.143852894216051, |
| "learning_rate": 2.2191011235955055e-07, |
| "logps/chosen": -48.24225616455078, |
| "logps/rejected": -63.4071044921875, |
| "loss": 0.4505, |
| "losses/dpo": 0.4800674617290497, |
| "losses/sft": 1.840663194656372, |
| "losses/total": 0.4800674617290497, |
| "ref_logps/chosen": -38.12638854980469, |
| "ref_logps/rejected": -44.40414047241211, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.0115869045257568, |
| "rewards/margins": 0.8887090682983398, |
| "rewards/rejected": -1.9002958536148071, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 8.978669612809192, |
| "learning_rate": 2.205056179775281e-07, |
| "logps/chosen": -49.006935119628906, |
| "logps/rejected": -64.8754653930664, |
| "loss": 0.4701, |
| "losses/dpo": 0.3446974754333496, |
| "losses/sft": 1.800228476524353, |
| "losses/total": 0.3446974754333496, |
| "ref_logps/chosen": -39.40437698364258, |
| "ref_logps/rejected": -46.17237091064453, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9602562189102173, |
| "rewards/margins": 0.9100530743598938, |
| "rewards/rejected": -1.8703094720840454, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 9.39799976416451, |
| "learning_rate": 2.191011235955056e-07, |
| "logps/chosen": -49.0966682434082, |
| "logps/rejected": -62.466331481933594, |
| "loss": 0.4979, |
| "losses/dpo": 0.7114862203598022, |
| "losses/sft": 1.6450343132019043, |
| "losses/total": 0.7114862203598022, |
| "ref_logps/chosen": -38.396942138671875, |
| "ref_logps/rejected": -43.12867736816406, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -1.0699726343154907, |
| "rewards/margins": 0.8637927770614624, |
| "rewards/rejected": -1.9337654113769531, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 7.9075518471198585, |
| "learning_rate": 2.1769662921348314e-07, |
| "logps/chosen": -49.44485092163086, |
| "logps/rejected": -64.67866516113281, |
| "loss": 0.4424, |
| "losses/dpo": 0.3071708679199219, |
| "losses/sft": 2.3602986335754395, |
| "losses/total": 0.3071708679199219, |
| "ref_logps/chosen": -39.44845199584961, |
| "ref_logps/rejected": -45.48222351074219, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.999640703201294, |
| "rewards/margins": 0.9200041890144348, |
| "rewards/rejected": -1.919644832611084, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 8.785359064590306, |
| "learning_rate": 2.1629213483146067e-07, |
| "logps/chosen": -53.305233001708984, |
| "logps/rejected": -65.16683959960938, |
| "loss": 0.4948, |
| "losses/dpo": 0.5694843530654907, |
| "losses/sft": 1.6856664419174194, |
| "losses/total": 0.5694843530654907, |
| "ref_logps/chosen": -42.595603942871094, |
| "ref_logps/rejected": -46.201744079589844, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.0709630250930786, |
| "rewards/margins": 0.8255467414855957, |
| "rewards/rejected": -1.8965098857879639, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 9.446502314051532, |
| "learning_rate": 2.148876404494382e-07, |
| "logps/chosen": -48.44091033935547, |
| "logps/rejected": -60.52621078491211, |
| "loss": 0.526, |
| "losses/dpo": 0.45424705743789673, |
| "losses/sft": 2.151371479034424, |
| "losses/total": 0.45424705743789673, |
| "ref_logps/chosen": -38.4775390625, |
| "ref_logps/rejected": -43.26105499267578, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9963367581367493, |
| "rewards/margins": 0.730178952217102, |
| "rewards/rejected": -1.726515531539917, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 8.81696350872476, |
| "learning_rate": 2.134831460674157e-07, |
| "logps/chosen": -50.6672477722168, |
| "logps/rejected": -66.28202819824219, |
| "loss": 0.4731, |
| "losses/dpo": 0.3640851080417633, |
| "losses/sft": 1.5808416604995728, |
| "losses/total": 0.3640851080417633, |
| "ref_logps/chosen": -40.499908447265625, |
| "ref_logps/rejected": -47.59101486206055, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0167338848114014, |
| "rewards/margins": 0.852367103099823, |
| "rewards/rejected": -1.8691009283065796, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 9.28428850947705, |
| "learning_rate": 2.1207865168539323e-07, |
| "logps/chosen": -54.07783889770508, |
| "logps/rejected": -65.76669311523438, |
| "loss": 0.5163, |
| "losses/dpo": 0.8544118404388428, |
| "losses/sft": 1.9635812044143677, |
| "losses/total": 0.8544118404388428, |
| "ref_logps/chosen": -42.8679084777832, |
| "ref_logps/rejected": -46.79513931274414, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -1.120992660522461, |
| "rewards/margins": 0.7761632204055786, |
| "rewards/rejected": -1.897156000137329, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 9.752420565118218, |
| "learning_rate": 2.1067415730337076e-07, |
| "logps/chosen": -49.50055694580078, |
| "logps/rejected": -65.74305725097656, |
| "loss": 0.4852, |
| "losses/dpo": 0.23906151950359344, |
| "losses/sft": 1.417936086654663, |
| "losses/total": 0.23906151950359344, |
| "ref_logps/chosen": -39.56950378417969, |
| "ref_logps/rejected": -47.11359405517578, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9931050539016724, |
| "rewards/margins": 0.8698407411575317, |
| "rewards/rejected": -1.8629456758499146, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 9.475549639179285, |
| "learning_rate": 2.0926966292134832e-07, |
| "logps/chosen": -49.32406997680664, |
| "logps/rejected": -63.67836380004883, |
| "loss": 0.5131, |
| "losses/dpo": 0.4199092984199524, |
| "losses/sft": 1.6027624607086182, |
| "losses/total": 0.4199092984199524, |
| "ref_logps/chosen": -39.44010925292969, |
| "ref_logps/rejected": -46.58472442626953, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.988396942615509, |
| "rewards/margins": 0.7209669947624207, |
| "rewards/rejected": -1.7093638181686401, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 8.691211910246372, |
| "learning_rate": 2.0786516853932585e-07, |
| "logps/chosen": -50.76154327392578, |
| "logps/rejected": -63.2697868347168, |
| "loss": 0.4931, |
| "losses/dpo": 0.48489272594451904, |
| "losses/sft": 2.4353795051574707, |
| "losses/total": 0.48489272594451904, |
| "ref_logps/chosen": -40.483741760253906, |
| "ref_logps/rejected": -45.29812240600586, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0277801752090454, |
| "rewards/margins": 0.7693858742713928, |
| "rewards/rejected": -1.7971662282943726, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 9.872927469509172, |
| "learning_rate": 2.0646067415730336e-07, |
| "logps/chosen": -49.138465881347656, |
| "logps/rejected": -66.16885375976562, |
| "loss": 0.5055, |
| "losses/dpo": 0.5177363753318787, |
| "losses/sft": 1.834108829498291, |
| "losses/total": 0.5177363753318787, |
| "ref_logps/chosen": -38.63552474975586, |
| "ref_logps/rejected": -48.135929107666016, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.050294280052185, |
| "rewards/margins": 0.7529983520507812, |
| "rewards/rejected": -1.8032926321029663, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 9.265714326509956, |
| "learning_rate": 2.0505617977528089e-07, |
| "logps/chosen": -48.308753967285156, |
| "logps/rejected": -66.52729034423828, |
| "loss": 0.486, |
| "losses/dpo": 0.6348212957382202, |
| "losses/sft": 1.772031307220459, |
| "losses/total": 0.6348212957382202, |
| "ref_logps/chosen": -38.7848014831543, |
| "ref_logps/rejected": -48.895755767822266, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.9523951411247253, |
| "rewards/margins": 0.8107584714889526, |
| "rewards/rejected": -1.7631536722183228, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 9.119904977454695, |
| "learning_rate": 2.0365168539325842e-07, |
| "logps/chosen": -50.29534912109375, |
| "logps/rejected": -64.1716537475586, |
| "loss": 0.4977, |
| "losses/dpo": 0.522094190120697, |
| "losses/sft": 1.8622956275939941, |
| "losses/total": 0.522094190120697, |
| "ref_logps/chosen": -39.614479064941406, |
| "ref_logps/rejected": -45.53257751464844, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -1.0680873394012451, |
| "rewards/margins": 0.7958202958106995, |
| "rewards/rejected": -1.8639075756072998, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 8.300519651321538, |
| "learning_rate": 2.0224719101123595e-07, |
| "logps/chosen": -48.15717315673828, |
| "logps/rejected": -66.43309783935547, |
| "loss": 0.4749, |
| "losses/dpo": 0.7441291809082031, |
| "losses/sft": 2.440709352493286, |
| "losses/total": 0.7441291809082031, |
| "ref_logps/chosen": -38.77737808227539, |
| "ref_logps/rejected": -47.56039047241211, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.9379786252975464, |
| "rewards/margins": 0.9492916464805603, |
| "rewards/rejected": -1.887270212173462, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 9.317842509294723, |
| "learning_rate": 2.0084269662921348e-07, |
| "logps/chosen": -44.09011459350586, |
| "logps/rejected": -59.16832733154297, |
| "loss": 0.5178, |
| "losses/dpo": 0.8791393041610718, |
| "losses/sft": 2.401695489883423, |
| "losses/total": 0.8791393041610718, |
| "ref_logps/chosen": -35.33125305175781, |
| "ref_logps/rejected": -43.098087310791016, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8758863210678101, |
| "rewards/margins": 0.7311373353004456, |
| "rewards/rejected": -1.60702383518219, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 8.201438226460152, |
| "learning_rate": 1.9943820224719098e-07, |
| "logps/chosen": -51.51540756225586, |
| "logps/rejected": -67.69857788085938, |
| "loss": 0.4104, |
| "losses/dpo": 0.48046156764030457, |
| "losses/sft": 1.7079527378082275, |
| "losses/total": 0.48046156764030457, |
| "ref_logps/chosen": -42.20224380493164, |
| "ref_logps/rejected": -47.85743713378906, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -0.9313161373138428, |
| "rewards/margins": 1.0527985095977783, |
| "rewards/rejected": -1.9841147661209106, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 9.182124453243851, |
| "learning_rate": 1.9803370786516854e-07, |
| "logps/chosen": -52.86988067626953, |
| "logps/rejected": -64.82228088378906, |
| "loss": 0.4958, |
| "losses/dpo": 0.4478102922439575, |
| "losses/sft": 1.8136005401611328, |
| "losses/total": 0.4478102922439575, |
| "ref_logps/chosen": -43.32461929321289, |
| "ref_logps/rejected": -48.31917190551758, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.9545266032218933, |
| "rewards/margins": 0.6957840323448181, |
| "rewards/rejected": -1.650310754776001, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 8.664903887618609, |
| "learning_rate": 1.9662921348314607e-07, |
| "logps/chosen": -47.40139389038086, |
| "logps/rejected": -61.24211502075195, |
| "loss": 0.5069, |
| "losses/dpo": 0.45637544989585876, |
| "losses/sft": 2.080510139465332, |
| "losses/total": 0.45637544989585876, |
| "ref_logps/chosen": -37.96201705932617, |
| "ref_logps/rejected": -44.53689956665039, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.9439379572868347, |
| "rewards/margins": 0.7265833616256714, |
| "rewards/rejected": -1.6705212593078613, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 8.357609816704013, |
| "learning_rate": 1.952247191011236e-07, |
| "logps/chosen": -51.97878646850586, |
| "logps/rejected": -66.72062683105469, |
| "loss": 0.4312, |
| "losses/dpo": 0.3072975277900696, |
| "losses/sft": 1.8392665386199951, |
| "losses/total": 0.3072975277900696, |
| "ref_logps/chosen": -42.230186462402344, |
| "ref_logps/rejected": -47.218257904052734, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.9748601913452148, |
| "rewards/margins": 0.9753769040107727, |
| "rewards/rejected": -1.9502369165420532, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 8.434371561430744, |
| "learning_rate": 1.938202247191011e-07, |
| "logps/chosen": -50.701534271240234, |
| "logps/rejected": -65.6552734375, |
| "loss": 0.4402, |
| "losses/dpo": 0.4483451545238495, |
| "losses/sft": 1.6883811950683594, |
| "losses/total": 0.4483451545238495, |
| "ref_logps/chosen": -40.96206283569336, |
| "ref_logps/rejected": -46.52254104614258, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.9739474654197693, |
| "rewards/margins": 0.9393259286880493, |
| "rewards/rejected": -1.9132733345031738, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 8.326106947840566, |
| "learning_rate": 1.9241573033707863e-07, |
| "logps/chosen": -49.05642318725586, |
| "logps/rejected": -62.65263366699219, |
| "loss": 0.4559, |
| "losses/dpo": 0.43069154024124146, |
| "losses/sft": 2.6451079845428467, |
| "losses/total": 0.43069154024124146, |
| "ref_logps/chosen": -39.344356536865234, |
| "ref_logps/rejected": -43.80147933959961, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9712071418762207, |
| "rewards/margins": 0.9139088988304138, |
| "rewards/rejected": -1.8851161003112793, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 10.317747735678177, |
| "learning_rate": 1.9101123595505617e-07, |
| "logps/chosen": -47.29045867919922, |
| "logps/rejected": -60.76853942871094, |
| "loss": 0.5233, |
| "losses/dpo": 0.6233609914779663, |
| "losses/sft": 2.166818618774414, |
| "losses/total": 0.6233609914779663, |
| "ref_logps/chosen": -37.4037971496582, |
| "ref_logps/rejected": -43.672698974609375, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.9886665344238281, |
| "rewards/margins": 0.7209180593490601, |
| "rewards/rejected": -1.7095845937728882, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 8.570482678193486, |
| "learning_rate": 1.896067415730337e-07, |
| "logps/chosen": -47.826866149902344, |
| "logps/rejected": -64.69200897216797, |
| "loss": 0.4646, |
| "losses/dpo": 0.39222848415374756, |
| "losses/sft": 1.7622792720794678, |
| "losses/total": 0.39222848415374756, |
| "ref_logps/chosen": -37.465206146240234, |
| "ref_logps/rejected": -45.77225112915039, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.0361659526824951, |
| "rewards/margins": 0.8558104634284973, |
| "rewards/rejected": -1.8919763565063477, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 8.082044540577316, |
| "learning_rate": 1.8820224719101123e-07, |
| "logps/chosen": -44.3437385559082, |
| "logps/rejected": -65.46928405761719, |
| "loss": 0.4247, |
| "losses/dpo": 0.25466495752334595, |
| "losses/sft": 1.6573872566223145, |
| "losses/total": 0.25466495752334595, |
| "ref_logps/chosen": -35.26055908203125, |
| "ref_logps/rejected": -46.34375, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.908318042755127, |
| "rewards/margins": 1.0042363405227661, |
| "rewards/rejected": -1.9125542640686035, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 7.886213605680278, |
| "learning_rate": 1.8679775280898876e-07, |
| "logps/chosen": -44.65882873535156, |
| "logps/rejected": -65.91732025146484, |
| "loss": 0.4233, |
| "losses/dpo": 0.37609466910362244, |
| "losses/sft": 1.7309682369232178, |
| "losses/total": 0.37609466910362244, |
| "ref_logps/chosen": -34.98197555541992, |
| "ref_logps/rejected": -45.339569091796875, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.9676854610443115, |
| "rewards/margins": 1.0900897979736328, |
| "rewards/rejected": -2.0577754974365234, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 9.420349702400047, |
| "learning_rate": 1.853932584269663e-07, |
| "logps/chosen": -55.511512756347656, |
| "logps/rejected": -67.9210205078125, |
| "loss": 0.4846, |
| "losses/dpo": 0.35744136571884155, |
| "losses/sft": 1.9689966440200806, |
| "losses/total": 0.35744136571884155, |
| "ref_logps/chosen": -44.93099594116211, |
| "ref_logps/rejected": -48.529052734375, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.058051347732544, |
| "rewards/margins": 0.8811461925506592, |
| "rewards/rejected": -1.9391975402832031, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 8.90872581509833, |
| "learning_rate": 1.8398876404494382e-07, |
| "logps/chosen": -50.006412506103516, |
| "logps/rejected": -65.82926940917969, |
| "loss": 0.4658, |
| "losses/dpo": 0.42064571380615234, |
| "losses/sft": 1.965097427368164, |
| "losses/total": 0.42064571380615234, |
| "ref_logps/chosen": -39.1809196472168, |
| "ref_logps/rejected": -45.722938537597656, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.0825499296188354, |
| "rewards/margins": 0.9280825853347778, |
| "rewards/rejected": -2.0106325149536133, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.01, |
| "grad_norm": 7.965650717330079, |
| "learning_rate": 1.8258426966292135e-07, |
| "logps/chosen": -50.00637435913086, |
| "logps/rejected": -65.08876037597656, |
| "loss": 0.4116, |
| "losses/dpo": 0.36081230640411377, |
| "losses/sft": 2.0271382331848145, |
| "losses/total": 0.36081230640411377, |
| "ref_logps/chosen": -39.65681076049805, |
| "ref_logps/rejected": -44.160343170166016, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -1.0349565744400024, |
| "rewards/margins": 1.0578850507736206, |
| "rewards/rejected": -2.092841625213623, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.02, |
| "grad_norm": 8.213010773638015, |
| "learning_rate": 1.8117977528089888e-07, |
| "logps/chosen": -49.70448303222656, |
| "logps/rejected": -64.45352172851562, |
| "loss": 0.4328, |
| "losses/dpo": 0.4751141667366028, |
| "losses/sft": 2.163590431213379, |
| "losses/total": 0.4751141667366028, |
| "ref_logps/chosen": -39.66984558105469, |
| "ref_logps/rejected": -45.02206802368164, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.0034637451171875, |
| "rewards/margins": 0.9396811723709106, |
| "rewards/rejected": -1.943144679069519, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.02, |
| "grad_norm": 8.627785952714778, |
| "learning_rate": 1.7977528089887638e-07, |
| "logps/chosen": -47.43844985961914, |
| "logps/rejected": -61.57966995239258, |
| "loss": 0.4366, |
| "losses/dpo": 0.4777096211910248, |
| "losses/sft": 2.011448860168457, |
| "losses/total": 0.4777096211910248, |
| "ref_logps/chosen": -37.887752532958984, |
| "ref_logps/rejected": -42.577537536621094, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -0.9550699591636658, |
| "rewards/margins": 0.945143461227417, |
| "rewards/rejected": -1.9002132415771484, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.03, |
| "grad_norm": 9.54969996951565, |
| "learning_rate": 1.7837078651685391e-07, |
| "logps/chosen": -50.50410842895508, |
| "logps/rejected": -64.97512817382812, |
| "loss": 0.4367, |
| "losses/dpo": 0.4186415672302246, |
| "losses/sft": 2.4884321689605713, |
| "losses/total": 0.4186415672302246, |
| "ref_logps/chosen": -39.366878509521484, |
| "ref_logps/rejected": -44.09857940673828, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.1137233972549438, |
| "rewards/margins": 0.973931610584259, |
| "rewards/rejected": -2.0876548290252686, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 7.541865642493473, |
| "learning_rate": 1.7696629213483144e-07, |
| "logps/chosen": -53.07981491088867, |
| "logps/rejected": -69.67108917236328, |
| "loss": 0.3686, |
| "losses/dpo": 0.23445191979408264, |
| "losses/sft": 1.9627153873443604, |
| "losses/total": 0.23445191979408264, |
| "ref_logps/chosen": -41.86531448364258, |
| "ref_logps/rejected": -46.77862548828125, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -1.121450424194336, |
| "rewards/margins": 1.1677953004837036, |
| "rewards/rejected": -2.289245843887329, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 7.78557132067994, |
| "learning_rate": 1.75561797752809e-07, |
| "logps/chosen": -44.59587478637695, |
| "logps/rejected": -61.11756896972656, |
| "loss": 0.4304, |
| "losses/dpo": 0.40728461742401123, |
| "losses/sft": 2.1670854091644287, |
| "losses/total": 0.40728461742401123, |
| "ref_logps/chosen": -34.50210189819336, |
| "ref_logps/rejected": -41.35382843017578, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.009376883506775, |
| "rewards/margins": 0.9669971466064453, |
| "rewards/rejected": -1.9763740301132202, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 8.691528110147415, |
| "learning_rate": 1.741573033707865e-07, |
| "logps/chosen": -47.1044807434082, |
| "logps/rejected": -62.402366638183594, |
| "loss": 0.4439, |
| "losses/dpo": 0.6048084497451782, |
| "losses/sft": 2.738722801208496, |
| "losses/total": 0.6048084497451782, |
| "ref_logps/chosen": -36.921566009521484, |
| "ref_logps/rejected": -41.66175842285156, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.0182914733886719, |
| "rewards/margins": 1.0557701587677002, |
| "rewards/rejected": -2.074061632156372, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.06, |
| "grad_norm": 7.3620175100007135, |
| "learning_rate": 1.7275280898876404e-07, |
| "logps/chosen": -51.797447204589844, |
| "logps/rejected": -69.3549575805664, |
| "loss": 0.3635, |
| "losses/dpo": 0.4010230600833893, |
| "losses/sft": 1.782325029373169, |
| "losses/total": 0.4010230600833893, |
| "ref_logps/chosen": -41.61540222167969, |
| "ref_logps/rejected": -47.2431526184082, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -1.0182045698165894, |
| "rewards/margins": 1.192975640296936, |
| "rewards/rejected": -2.2111802101135254, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.07, |
| "grad_norm": 8.804306158193548, |
| "learning_rate": 1.7134831460674157e-07, |
| "logps/chosen": -51.407501220703125, |
| "logps/rejected": -64.26744079589844, |
| "loss": 0.4578, |
| "losses/dpo": 0.6319560408592224, |
| "losses/sft": 2.214840888977051, |
| "losses/total": 0.6319560408592224, |
| "ref_logps/chosen": -40.07988739013672, |
| "ref_logps/rejected": -43.7442741394043, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.1327617168426514, |
| "rewards/margins": 0.919555127620697, |
| "rewards/rejected": -2.052316665649414, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 9.887135650412674, |
| "learning_rate": 1.699438202247191e-07, |
| "logps/chosen": -48.88153839111328, |
| "logps/rejected": -64.42852783203125, |
| "loss": 0.4198, |
| "losses/dpo": 0.36265987157821655, |
| "losses/sft": 2.2624428272247314, |
| "losses/total": 0.36265987157821655, |
| "ref_logps/chosen": -38.071693420410156, |
| "ref_logps/rejected": -42.96941375732422, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.080984115600586, |
| "rewards/margins": 1.064927339553833, |
| "rewards/rejected": -2.145911455154419, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 7.789984027153804, |
| "learning_rate": 1.6853932584269663e-07, |
| "logps/chosen": -53.47975158691406, |
| "logps/rejected": -69.208251953125, |
| "loss": 0.4077, |
| "losses/dpo": 0.3306717872619629, |
| "losses/sft": 1.8213623762130737, |
| "losses/total": 0.3306717872619629, |
| "ref_logps/chosen": -42.26997375488281, |
| "ref_logps/rejected": -47.255218505859375, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.1209776401519775, |
| "rewards/margins": 1.0743255615234375, |
| "rewards/rejected": -2.195303440093994, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.09, |
| "grad_norm": 8.577961125042698, |
| "learning_rate": 1.6713483146067413e-07, |
| "logps/chosen": -48.9437255859375, |
| "logps/rejected": -66.896484375, |
| "loss": 0.4237, |
| "losses/dpo": 0.375847727060318, |
| "losses/sft": 1.7302836179733276, |
| "losses/total": 0.375847727060318, |
| "ref_logps/chosen": -38.10498046875, |
| "ref_logps/rejected": -45.20066452026367, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.0838744640350342, |
| "rewards/margins": 1.0857088565826416, |
| "rewards/rejected": -2.1695830821990967, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 7.88842347856655, |
| "learning_rate": 1.6573033707865166e-07, |
| "logps/chosen": -50.38557815551758, |
| "logps/rejected": -71.69691467285156, |
| "loss": 0.3884, |
| "losses/dpo": 0.20783157646656036, |
| "losses/sft": 1.65842604637146, |
| "losses/total": 0.20783157646656036, |
| "ref_logps/chosen": -38.84345245361328, |
| "ref_logps/rejected": -47.66883850097656, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -1.1542127132415771, |
| "rewards/margins": 1.2485952377319336, |
| "rewards/rejected": -2.40280818939209, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.11, |
| "grad_norm": 8.27071868454665, |
| "learning_rate": 1.6432584269662922e-07, |
| "logps/chosen": -50.050907135009766, |
| "logps/rejected": -71.91120147705078, |
| "loss": 0.3873, |
| "losses/dpo": 0.36935174465179443, |
| "losses/sft": 2.4310526847839355, |
| "losses/total": 0.36935174465179443, |
| "ref_logps/chosen": -38.60224914550781, |
| "ref_logps/rejected": -48.42378234863281, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.1448655128479004, |
| "rewards/margins": 1.2038761377334595, |
| "rewards/rejected": -2.3487415313720703, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.11, |
| "grad_norm": 8.202604558490952, |
| "learning_rate": 1.6292134831460675e-07, |
| "logps/chosen": -49.78974533081055, |
| "logps/rejected": -63.648345947265625, |
| "loss": 0.4287, |
| "losses/dpo": 0.4768953025341034, |
| "losses/sft": 1.486172080039978, |
| "losses/total": 0.4768953025341034, |
| "ref_logps/chosen": -38.987091064453125, |
| "ref_logps/rejected": -42.706016540527344, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.0802651643753052, |
| "rewards/margins": 1.0139687061309814, |
| "rewards/rejected": -2.094233751296997, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 7.6837084768811295, |
| "learning_rate": 1.6151685393258428e-07, |
| "logps/chosen": -46.654842376708984, |
| "logps/rejected": -65.57063293457031, |
| "loss": 0.4002, |
| "losses/dpo": 0.25231242179870605, |
| "losses/sft": 1.6833255290985107, |
| "losses/total": 0.25231242179870605, |
| "ref_logps/chosen": -35.983055114746094, |
| "ref_logps/rejected": -43.31926345825195, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.0671789646148682, |
| "rewards/margins": 1.1579577922821045, |
| "rewards/rejected": -2.2251367568969727, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.13, |
| "grad_norm": 7.367492054152066, |
| "learning_rate": 1.6011235955056178e-07, |
| "logps/chosen": -47.970069885253906, |
| "logps/rejected": -67.12972259521484, |
| "loss": 0.3817, |
| "losses/dpo": 0.2698941230773926, |
| "losses/sft": 2.0059823989868164, |
| "losses/total": 0.2698941230773926, |
| "ref_logps/chosen": -37.96726608276367, |
| "ref_logps/rejected": -44.837013244628906, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.0002803802490234, |
| "rewards/margins": 1.2289901971817017, |
| "rewards/rejected": -2.2292706966400146, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.14, |
| "grad_norm": 8.198398756823499, |
| "learning_rate": 1.5870786516853931e-07, |
| "logps/chosen": -52.00371551513672, |
| "logps/rejected": -71.66854095458984, |
| "loss": 0.373, |
| "losses/dpo": 0.37371307611465454, |
| "losses/sft": 2.166947841644287, |
| "losses/total": 0.37371307611465454, |
| "ref_logps/chosen": -40.8586540222168, |
| "ref_logps/rejected": -47.34501266479492, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1145060062408447, |
| "rewards/margins": 1.3178460597991943, |
| "rewards/rejected": -2.432352304458618, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.14, |
| "grad_norm": 7.211631798978324, |
| "learning_rate": 1.5730337078651685e-07, |
| "logps/chosen": -45.30519104003906, |
| "logps/rejected": -67.6802749633789, |
| "loss": 0.3529, |
| "losses/dpo": 0.6495727896690369, |
| "losses/sft": 2.1896119117736816, |
| "losses/total": 0.6495727896690369, |
| "ref_logps/chosen": -35.870975494384766, |
| "ref_logps/rejected": -45.01594924926758, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9434216022491455, |
| "rewards/margins": 1.3230111598968506, |
| "rewards/rejected": -2.266432523727417, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 8.73649186021766, |
| "learning_rate": 1.5589887640449438e-07, |
| "logps/chosen": -51.24886703491211, |
| "logps/rejected": -73.5231704711914, |
| "loss": 0.3706, |
| "losses/dpo": 0.2774621248245239, |
| "losses/sft": 2.14704966545105, |
| "losses/total": 0.2774621248245239, |
| "ref_logps/chosen": -39.21043395996094, |
| "ref_logps/rejected": -48.85007858276367, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.2038426399230957, |
| "rewards/margins": 1.2634668350219727, |
| "rewards/rejected": -2.4673094749450684, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 9.965422332405613, |
| "learning_rate": 1.5449438202247188e-07, |
| "logps/chosen": -56.63559341430664, |
| "logps/rejected": -69.31437683105469, |
| "loss": 0.4581, |
| "losses/dpo": 1.019913673400879, |
| "losses/sft": 2.6583502292633057, |
| "losses/total": 1.019913673400879, |
| "ref_logps/chosen": -42.96681594848633, |
| "ref_logps/rejected": -44.904361724853516, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.3668776750564575, |
| "rewards/margins": 1.0741242170333862, |
| "rewards/rejected": -2.4410018920898438, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 9.04029000539786, |
| "learning_rate": 1.5308988764044944e-07, |
| "logps/chosen": -50.918128967285156, |
| "logps/rejected": -67.66094970703125, |
| "loss": 0.4491, |
| "losses/dpo": 0.536713182926178, |
| "losses/sft": 2.0050907135009766, |
| "losses/total": 0.536713182926178, |
| "ref_logps/chosen": -37.77289581298828, |
| "ref_logps/rejected": -43.736061096191406, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.3145227432250977, |
| "rewards/margins": 1.0779664516448975, |
| "rewards/rejected": -2.392489194869995, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 7.9909005031619476, |
| "learning_rate": 1.5168539325842697e-07, |
| "logps/chosen": -48.0228271484375, |
| "logps/rejected": -72.348876953125, |
| "loss": 0.3569, |
| "losses/dpo": 0.22953173518180847, |
| "losses/sft": 1.752846360206604, |
| "losses/total": 0.22953173518180847, |
| "ref_logps/chosen": -36.91614532470703, |
| "ref_logps/rejected": -47.88288497924805, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -1.1106677055358887, |
| "rewards/margins": 1.3359307050704956, |
| "rewards/rejected": -2.4465982913970947, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.18, |
| "grad_norm": 9.189297221788385, |
| "learning_rate": 1.502808988764045e-07, |
| "logps/chosen": -55.16273880004883, |
| "logps/rejected": -74.13533782958984, |
| "loss": 0.4081, |
| "losses/dpo": 0.39626190066337585, |
| "losses/sft": 2.1484994888305664, |
| "losses/total": 0.39626190066337585, |
| "ref_logps/chosen": -42.12677001953125, |
| "ref_logps/rejected": -48.699642181396484, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.3035968542099, |
| "rewards/margins": 1.2399725914001465, |
| "rewards/rejected": -2.543569326400757, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.19, |
| "grad_norm": 8.223258235820202, |
| "learning_rate": 1.4887640449438203e-07, |
| "logps/chosen": -46.8680534362793, |
| "logps/rejected": -64.84513854980469, |
| "loss": 0.3998, |
| "losses/dpo": 0.38319000601768494, |
| "losses/sft": 2.0748698711395264, |
| "losses/total": 0.38319000601768494, |
| "ref_logps/chosen": -35.66654968261719, |
| "ref_logps/rejected": -42.36066818237305, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.1201505661010742, |
| "rewards/margins": 1.1282968521118164, |
| "rewards/rejected": -2.2484474182128906, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 9.322598904199673, |
| "learning_rate": 1.4747191011235953e-07, |
| "logps/chosen": -51.94579315185547, |
| "logps/rejected": -76.23491668701172, |
| "loss": 0.4365, |
| "losses/dpo": 0.6639813184738159, |
| "losses/sft": 3.0463194847106934, |
| "losses/total": 0.6639813184738159, |
| "ref_logps/chosen": -37.92943572998047, |
| "ref_logps/rejected": -49.77875518798828, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.4016355276107788, |
| "rewards/margins": 1.243980884552002, |
| "rewards/rejected": -2.6456165313720703, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 9.28502050286711, |
| "learning_rate": 1.4606741573033706e-07, |
| "logps/chosen": -51.56089782714844, |
| "logps/rejected": -67.3809814453125, |
| "loss": 0.4295, |
| "losses/dpo": 0.2485855668783188, |
| "losses/sft": 2.5399341583251953, |
| "losses/total": 0.2485855668783188, |
| "ref_logps/chosen": -38.323081970214844, |
| "ref_logps/rejected": -43.60871505737305, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.3237823247909546, |
| "rewards/margins": 1.0534443855285645, |
| "rewards/rejected": -2.3772268295288086, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.21, |
| "grad_norm": 10.352671653799021, |
| "learning_rate": 1.446629213483146e-07, |
| "logps/chosen": -60.71104431152344, |
| "logps/rejected": -76.54723358154297, |
| "loss": 0.4205, |
| "losses/dpo": 0.5032411813735962, |
| "losses/sft": 2.2452447414398193, |
| "losses/total": 0.5032411813735962, |
| "ref_logps/chosen": -45.97325897216797, |
| "ref_logps/rejected": -49.70647430419922, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.4737780094146729, |
| "rewards/margins": 1.2102973461151123, |
| "rewards/rejected": -2.684075117111206, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.22, |
| "grad_norm": 9.489700991538179, |
| "learning_rate": 1.4325842696629212e-07, |
| "logps/chosen": -51.33311462402344, |
| "logps/rejected": -72.56240844726562, |
| "loss": 0.4041, |
| "losses/dpo": 0.39366570115089417, |
| "losses/sft": 1.5643844604492188, |
| "losses/total": 0.39366570115089417, |
| "ref_logps/chosen": -38.85292053222656, |
| "ref_logps/rejected": -47.41047668457031, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -1.2480189800262451, |
| "rewards/margins": 1.2671747207641602, |
| "rewards/rejected": -2.5151939392089844, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.23, |
| "grad_norm": 8.202282597323313, |
| "learning_rate": 1.4185393258426968e-07, |
| "logps/chosen": -51.06114959716797, |
| "logps/rejected": -74.44361114501953, |
| "loss": 0.3473, |
| "losses/dpo": 0.40783169865608215, |
| "losses/sft": 2.3142831325531006, |
| "losses/total": 0.40783169865608215, |
| "ref_logps/chosen": -39.6861457824707, |
| "ref_logps/rejected": -49.402000427246094, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -1.1375010013580322, |
| "rewards/margins": 1.3666609525680542, |
| "rewards/rejected": -2.504162073135376, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.23, |
| "grad_norm": 8.616438775614668, |
| "learning_rate": 1.4044943820224718e-07, |
| "logps/chosen": -50.71443557739258, |
| "logps/rejected": -67.99952697753906, |
| "loss": 0.4191, |
| "losses/dpo": 0.3447108566761017, |
| "losses/sft": 1.6047589778900146, |
| "losses/total": 0.3447108566761017, |
| "ref_logps/chosen": -38.159549713134766, |
| "ref_logps/rejected": -44.2279052734375, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2554888725280762, |
| "rewards/margins": 1.12167227268219, |
| "rewards/rejected": -2.3771612644195557, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 8.42295456953331, |
| "learning_rate": 1.3904494382022472e-07, |
| "logps/chosen": -50.11585235595703, |
| "logps/rejected": -69.99391174316406, |
| "loss": 0.4154, |
| "losses/dpo": 0.6180249452590942, |
| "losses/sft": 1.9767247438430786, |
| "losses/total": 0.6180249452590942, |
| "ref_logps/chosen": -37.855167388916016, |
| "ref_logps/rejected": -45.18976974487305, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.2260689735412598, |
| "rewards/margins": 1.2543449401855469, |
| "rewards/rejected": -2.4804139137268066, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 7.916024207524859, |
| "learning_rate": 1.3764044943820225e-07, |
| "logps/chosen": -48.01890563964844, |
| "logps/rejected": -71.55242919921875, |
| "loss": 0.3286, |
| "losses/dpo": 0.39366215467453003, |
| "losses/sft": 1.6183239221572876, |
| "losses/total": 0.39366215467453003, |
| "ref_logps/chosen": -36.65507888793945, |
| "ref_logps/rejected": -45.11772537231445, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.1363829374313354, |
| "rewards/margins": 1.5070867538452148, |
| "rewards/rejected": -2.64346981048584, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.26, |
| "grad_norm": 8.381587007100304, |
| "learning_rate": 1.3623595505617978e-07, |
| "logps/chosen": -48.97580337524414, |
| "logps/rejected": -71.05843353271484, |
| "loss": 0.369, |
| "losses/dpo": 0.4669285714626312, |
| "losses/sft": 2.4736876487731934, |
| "losses/total": 0.4669285714626312, |
| "ref_logps/chosen": -36.59314727783203, |
| "ref_logps/rejected": -46.38197708129883, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -1.2382655143737793, |
| "rewards/margins": 1.229379415512085, |
| "rewards/rejected": -2.4676451683044434, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.26, |
| "grad_norm": 7.816178695034966, |
| "learning_rate": 1.3483146067415728e-07, |
| "logps/chosen": -48.929466247558594, |
| "logps/rejected": -70.30461883544922, |
| "loss": 0.3683, |
| "losses/dpo": 0.30282700061798096, |
| "losses/sft": 2.068610906600952, |
| "losses/total": 0.30282700061798096, |
| "ref_logps/chosen": -36.281883239746094, |
| "ref_logps/rejected": -44.1247444152832, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.2647581100463867, |
| "rewards/margins": 1.3532286882400513, |
| "rewards/rejected": -2.6179869174957275, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.27, |
| "grad_norm": 9.42088429106351, |
| "learning_rate": 1.334269662921348e-07, |
| "logps/chosen": -53.99761199951172, |
| "logps/rejected": -68.16130828857422, |
| "loss": 0.4154, |
| "losses/dpo": 0.7334519028663635, |
| "losses/sft": 2.3190560340881348, |
| "losses/total": 0.7334519028663635, |
| "ref_logps/chosen": -41.94767379760742, |
| "ref_logps/rejected": -43.898292541503906, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.2049940824508667, |
| "rewards/margins": 1.2213077545166016, |
| "rewards/rejected": -2.4263014793395996, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.28, |
| "grad_norm": 8.474867745213691, |
| "learning_rate": 1.3202247191011234e-07, |
| "logps/chosen": -56.917606353759766, |
| "logps/rejected": -78.60893249511719, |
| "loss": 0.3643, |
| "losses/dpo": 0.30533695220947266, |
| "losses/sft": 2.2758193016052246, |
| "losses/total": 0.30533695220947266, |
| "ref_logps/chosen": -43.154685974121094, |
| "ref_logps/rejected": -50.671146392822266, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.3762919902801514, |
| "rewards/margins": 1.4174861907958984, |
| "rewards/rejected": -2.793778419494629, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.29, |
| "grad_norm": 9.098388851491974, |
| "learning_rate": 1.306179775280899e-07, |
| "logps/chosen": -52.541847229003906, |
| "logps/rejected": -75.27273559570312, |
| "loss": 0.4022, |
| "losses/dpo": 0.7690958380699158, |
| "losses/sft": 2.406214714050293, |
| "losses/total": 0.7690958380699158, |
| "ref_logps/chosen": -39.33592224121094, |
| "ref_logps/rejected": -50.239105224609375, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.3205927610397339, |
| "rewards/margins": 1.1827703714370728, |
| "rewards/rejected": -2.5033628940582275, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.29, |
| "grad_norm": 7.915112908663982, |
| "learning_rate": 1.2921348314606743e-07, |
| "logps/chosen": -52.54154968261719, |
| "logps/rejected": -79.33431243896484, |
| "loss": 0.3, |
| "losses/dpo": 0.22580446302890778, |
| "losses/sft": 2.1299729347229004, |
| "losses/total": 0.22580446302890778, |
| "ref_logps/chosen": -39.62370300292969, |
| "ref_logps/rejected": -51.30101013183594, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -1.2917848825454712, |
| "rewards/margins": 1.511545181274414, |
| "rewards/rejected": -2.8033299446105957, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 8.60697942251823, |
| "learning_rate": 1.2780898876404493e-07, |
| "logps/chosen": -54.95063781738281, |
| "logps/rejected": -76.17906188964844, |
| "loss": 0.3787, |
| "losses/dpo": 0.48840370774269104, |
| "losses/sft": 2.068924903869629, |
| "losses/total": 0.48840370774269104, |
| "ref_logps/chosen": -40.245113372802734, |
| "ref_logps/rejected": -48.299339294433594, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.4705531597137451, |
| "rewards/margins": 1.317419409751892, |
| "rewards/rejected": -2.7879724502563477, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.31, |
| "grad_norm": 8.916108799954989, |
| "learning_rate": 1.2640449438202246e-07, |
| "logps/chosen": -54.08748245239258, |
| "logps/rejected": -72.3311767578125, |
| "loss": 0.384, |
| "losses/dpo": 0.27609461545944214, |
| "losses/sft": 2.02748703956604, |
| "losses/total": 0.27609461545944214, |
| "ref_logps/chosen": -40.30693817138672, |
| "ref_logps/rejected": -46.038856506347656, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.3780547380447388, |
| "rewards/margins": 1.2511768341064453, |
| "rewards/rejected": -2.6292316913604736, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 9.08799197478477, |
| "learning_rate": 1.25e-07, |
| "logps/chosen": -54.119285583496094, |
| "logps/rejected": -65.3755111694336, |
| "loss": 0.4074, |
| "losses/dpo": 0.314796507358551, |
| "losses/sft": 2.275911808013916, |
| "losses/total": 0.314796507358551, |
| "ref_logps/chosen": -40.99217224121094, |
| "ref_logps/rejected": -41.43158721923828, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.312711477279663, |
| "rewards/margins": 1.081681251525879, |
| "rewards/rejected": -2.394392967224121, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 8.103580469459054, |
| "learning_rate": 1.2359550561797752e-07, |
| "logps/chosen": -51.23326873779297, |
| "logps/rejected": -75.66114807128906, |
| "loss": 0.3713, |
| "losses/dpo": 0.387349396944046, |
| "losses/sft": 2.3062949180603027, |
| "losses/total": 0.387349396944046, |
| "ref_logps/chosen": -36.80121994018555, |
| "ref_logps/rejected": -47.23471450805664, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.4432051181793213, |
| "rewards/margins": 1.399438738822937, |
| "rewards/rejected": -2.842643976211548, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.33, |
| "grad_norm": 8.263469946945373, |
| "learning_rate": 1.2219101123595506e-07, |
| "logps/chosen": -51.04032897949219, |
| "logps/rejected": -73.06275177001953, |
| "loss": 0.3571, |
| "losses/dpo": 0.4852275252342224, |
| "losses/sft": 2.0593721866607666, |
| "losses/total": 0.4852275252342224, |
| "ref_logps/chosen": -37.46986770629883, |
| "ref_logps/rejected": -45.448211669921875, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.3570460081100464, |
| "rewards/margins": 1.4044082164764404, |
| "rewards/rejected": -2.7614541053771973, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.34, |
| "grad_norm": 9.638610818670363, |
| "learning_rate": 1.2078651685393259e-07, |
| "logps/chosen": -61.78599166870117, |
| "logps/rejected": -79.4295883178711, |
| "loss": 0.3814, |
| "losses/dpo": 0.3531866669654846, |
| "losses/sft": 2.5623600482940674, |
| "losses/total": 0.3531866669654846, |
| "ref_logps/chosen": -47.63096618652344, |
| "ref_logps/rejected": -52.177154541015625, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.4155021905899048, |
| "rewards/margins": 1.3097403049468994, |
| "rewards/rejected": -2.7252423763275146, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 8.100555853256143, |
| "learning_rate": 1.1938202247191012e-07, |
| "logps/chosen": -51.98362350463867, |
| "logps/rejected": -76.51303100585938, |
| "loss": 0.3451, |
| "losses/dpo": 0.25824400782585144, |
| "losses/sft": 1.8786492347717285, |
| "losses/total": 0.25824400782585144, |
| "ref_logps/chosen": -38.72646713256836, |
| "ref_logps/rejected": -48.84027862548828, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.3257157802581787, |
| "rewards/margins": 1.4415602684020996, |
| "rewards/rejected": -2.767275810241699, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 8.895756271806501, |
| "learning_rate": 1.1797752808988763e-07, |
| "logps/chosen": -54.543087005615234, |
| "logps/rejected": -75.70824432373047, |
| "loss": 0.3937, |
| "losses/dpo": 0.44249895215034485, |
| "losses/sft": 2.0751869678497314, |
| "losses/total": 0.44249895215034485, |
| "ref_logps/chosen": -40.311073303222656, |
| "ref_logps/rejected": -47.72747802734375, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.423201322555542, |
| "rewards/margins": 1.3748749494552612, |
| "rewards/rejected": -2.7980761528015137, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 9.181986369222448, |
| "learning_rate": 1.1657303370786515e-07, |
| "logps/chosen": -53.058631896972656, |
| "logps/rejected": -76.43999481201172, |
| "loss": 0.3426, |
| "losses/dpo": 0.37313902378082275, |
| "losses/sft": 1.9281624555587769, |
| "losses/total": 0.37313902378082275, |
| "ref_logps/chosen": -38.72821044921875, |
| "ref_logps/rejected": -47.35258483886719, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -1.433042049407959, |
| "rewards/margins": 1.4756982326507568, |
| "rewards/rejected": -2.908740282058716, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.37, |
| "grad_norm": 10.667806756150233, |
| "learning_rate": 1.151685393258427e-07, |
| "logps/chosen": -55.91019058227539, |
| "logps/rejected": -73.06080627441406, |
| "loss": 0.4539, |
| "losses/dpo": 0.27354708313941956, |
| "losses/sft": 2.2720413208007812, |
| "losses/total": 0.27354708313941956, |
| "ref_logps/chosen": -40.70777893066406, |
| "ref_logps/rejected": -46.47440719604492, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.520241141319275, |
| "rewards/margins": 1.138399600982666, |
| "rewards/rejected": -2.6586403846740723, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 9.237157441443594, |
| "learning_rate": 1.1376404494382023e-07, |
| "logps/chosen": -52.99339294433594, |
| "logps/rejected": -77.64442443847656, |
| "loss": 0.413, |
| "losses/dpo": 0.5332150459289551, |
| "losses/sft": 2.2471044063568115, |
| "losses/total": 0.5332150459289551, |
| "ref_logps/chosen": -38.779422760009766, |
| "ref_logps/rejected": -49.12788772583008, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -1.4213968515396118, |
| "rewards/margins": 1.4302568435668945, |
| "rewards/rejected": -2.851653814315796, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 10.26818606773468, |
| "learning_rate": 1.1235955056179774e-07, |
| "logps/chosen": -54.45465087890625, |
| "logps/rejected": -77.27416229248047, |
| "loss": 0.3916, |
| "losses/dpo": 0.3256514072418213, |
| "losses/sft": 2.224207878112793, |
| "losses/total": 0.3256514072418213, |
| "ref_logps/chosen": -39.11296463012695, |
| "ref_logps/rejected": -47.37888717651367, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.5341691970825195, |
| "rewards/margins": 1.4553582668304443, |
| "rewards/rejected": -2.9895272254943848, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.39, |
| "grad_norm": 8.908115110424331, |
| "learning_rate": 1.1095505617977527e-07, |
| "logps/chosen": -52.21783447265625, |
| "logps/rejected": -74.33990478515625, |
| "loss": 0.3491, |
| "losses/dpo": 0.4351283013820648, |
| "losses/sft": 2.3193869590759277, |
| "losses/total": 0.4351283013820648, |
| "ref_logps/chosen": -38.05769348144531, |
| "ref_logps/rejected": -46.181705474853516, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.4160147905349731, |
| "rewards/margins": 1.3998043537139893, |
| "rewards/rejected": -2.815819263458252, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 9.20983406154459, |
| "learning_rate": 1.095505617977528e-07, |
| "logps/chosen": -51.61767578125, |
| "logps/rejected": -72.1242446899414, |
| "loss": 0.4144, |
| "losses/dpo": 0.21413980424404144, |
| "losses/sft": 1.885907530784607, |
| "losses/total": 0.21413980424404144, |
| "ref_logps/chosen": -37.064613342285156, |
| "ref_logps/rejected": -44.01332473754883, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -1.4553061723709106, |
| "rewards/margins": 1.355785608291626, |
| "rewards/rejected": -2.811091899871826, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.41, |
| "grad_norm": 9.683467779206334, |
| "learning_rate": 1.0814606741573033e-07, |
| "logps/chosen": -52.75745391845703, |
| "logps/rejected": -68.34822082519531, |
| "loss": 0.4051, |
| "losses/dpo": 0.5242694020271301, |
| "losses/sft": 1.8490060567855835, |
| "losses/total": 0.5242694020271301, |
| "ref_logps/chosen": -38.73335266113281, |
| "ref_logps/rejected": -42.334041595458984, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.4024099111557007, |
| "rewards/margins": 1.1990087032318115, |
| "rewards/rejected": -2.6014187335968018, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.42, |
| "grad_norm": 10.094879725137059, |
| "learning_rate": 1.0674157303370785e-07, |
| "logps/chosen": -55.7940788269043, |
| "logps/rejected": -71.7145767211914, |
| "loss": 0.4083, |
| "losses/dpo": 0.33178359270095825, |
| "losses/sft": 2.2600364685058594, |
| "losses/total": 0.33178359270095825, |
| "ref_logps/chosen": -41.903411865234375, |
| "ref_logps/rejected": -45.71622848510742, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.389066457748413, |
| "rewards/margins": 1.2107690572738647, |
| "rewards/rejected": -2.5998356342315674, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.42, |
| "grad_norm": 8.737981073393495, |
| "learning_rate": 1.0533707865168538e-07, |
| "logps/chosen": -52.43000793457031, |
| "logps/rejected": -70.14034271240234, |
| "loss": 0.4104, |
| "losses/dpo": 0.4298698902130127, |
| "losses/sft": 1.832787036895752, |
| "losses/total": 0.4298698902130127, |
| "ref_logps/chosen": -38.05841064453125, |
| "ref_logps/rejected": -43.76458740234375, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.4371598958969116, |
| "rewards/margins": 1.2004159688949585, |
| "rewards/rejected": -2.63757586479187, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.43, |
| "grad_norm": 8.489155654631809, |
| "learning_rate": 1.0393258426966293e-07, |
| "logps/chosen": -54.25529479980469, |
| "logps/rejected": -76.93711853027344, |
| "loss": 0.3281, |
| "losses/dpo": 0.2001137137413025, |
| "losses/sft": 1.605088710784912, |
| "losses/total": 0.2001137137413025, |
| "ref_logps/chosen": -41.82042694091797, |
| "ref_logps/rejected": -49.489280700683594, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.2434866428375244, |
| "rewards/margins": 1.5012969970703125, |
| "rewards/rejected": -2.744783401489258, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 8.976932361180113, |
| "learning_rate": 1.0252808988764044e-07, |
| "logps/chosen": -51.483985900878906, |
| "logps/rejected": -75.30467224121094, |
| "loss": 0.3253, |
| "losses/dpo": 0.28753212094306946, |
| "losses/sft": 2.173304557800293, |
| "losses/total": 0.28753212094306946, |
| "ref_logps/chosen": -38.227813720703125, |
| "ref_logps/rejected": -47.102657318115234, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.3256170749664307, |
| "rewards/margins": 1.4945844411849976, |
| "rewards/rejected": -2.8202013969421387, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 7.593474701084862, |
| "learning_rate": 1.0112359550561797e-07, |
| "logps/chosen": -47.74129867553711, |
| "logps/rejected": -69.7999267578125, |
| "loss": 0.3669, |
| "losses/dpo": 0.43520650267601013, |
| "losses/sft": 1.8445793390274048, |
| "losses/total": 0.43520650267601013, |
| "ref_logps/chosen": -35.14937210083008, |
| "ref_logps/rejected": -43.280242919921875, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.259192943572998, |
| "rewards/margins": 1.3927757740020752, |
| "rewards/rejected": -2.6519687175750732, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 9.113140126947922, |
| "learning_rate": 9.971910112359549e-08, |
| "logps/chosen": -51.48912811279297, |
| "logps/rejected": -75.93363189697266, |
| "loss": 0.3625, |
| "losses/dpo": 0.271272748708725, |
| "losses/sft": 2.1029720306396484, |
| "losses/total": 0.271272748708725, |
| "ref_logps/chosen": -38.47105026245117, |
| "ref_logps/rejected": -48.987518310546875, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.3018079996109009, |
| "rewards/margins": 1.3928041458129883, |
| "rewards/rejected": -2.6946120262145996, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.46, |
| "grad_norm": 9.203607647069793, |
| "learning_rate": 9.831460674157303e-08, |
| "logps/chosen": -56.46796417236328, |
| "logps/rejected": -72.37566375732422, |
| "loss": 0.3758, |
| "losses/dpo": 0.27879202365875244, |
| "losses/sft": 1.8894522190093994, |
| "losses/total": 0.27879202365875244, |
| "ref_logps/chosen": -43.5599365234375, |
| "ref_logps/rejected": -46.51646423339844, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.2908027172088623, |
| "rewards/margins": 1.295116901397705, |
| "rewards/rejected": -2.5859196186065674, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.47, |
| "grad_norm": 8.9943305703751, |
| "learning_rate": 9.691011235955055e-08, |
| "logps/chosen": -55.962684631347656, |
| "logps/rejected": -77.36865997314453, |
| "loss": 0.383, |
| "losses/dpo": 0.2792072296142578, |
| "losses/sft": 1.8898770809173584, |
| "losses/total": 0.2792072296142578, |
| "ref_logps/chosen": -41.8868522644043, |
| "ref_logps/rejected": -48.986183166503906, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.4075829982757568, |
| "rewards/margins": 1.430665373802185, |
| "rewards/rejected": -2.8382484912872314, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 9.502289617698672, |
| "learning_rate": 9.550561797752808e-08, |
| "logps/chosen": -50.20685577392578, |
| "logps/rejected": -66.51139831542969, |
| "loss": 0.441, |
| "losses/dpo": 0.2940795123577118, |
| "losses/sft": 2.298060894012451, |
| "losses/total": 0.2940795123577118, |
| "ref_logps/chosen": -37.659244537353516, |
| "ref_logps/rejected": -42.670841217041016, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -1.254760980606079, |
| "rewards/margins": 1.129294514656067, |
| "rewards/rejected": -2.3840553760528564, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 9.657263439264016, |
| "learning_rate": 9.410112359550561e-08, |
| "logps/chosen": -54.32592010498047, |
| "logps/rejected": -69.8943862915039, |
| "loss": 0.4163, |
| "losses/dpo": 0.6257603764533997, |
| "losses/sft": 2.595241069793701, |
| "losses/total": 0.6257603764533997, |
| "ref_logps/chosen": -40.53725814819336, |
| "ref_logps/rejected": -43.76425552368164, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -1.37886643409729, |
| "rewards/margins": 1.2341458797454834, |
| "rewards/rejected": -2.6130123138427734, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.49, |
| "grad_norm": 9.274122078417514, |
| "learning_rate": 9.269662921348314e-08, |
| "logps/chosen": -54.48114013671875, |
| "logps/rejected": -74.62823486328125, |
| "loss": 0.3801, |
| "losses/dpo": 0.46192625164985657, |
| "losses/sft": 2.048821449279785, |
| "losses/total": 0.46192625164985657, |
| "ref_logps/chosen": -40.628318786621094, |
| "ref_logps/rejected": -47.394065856933594, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.385282278060913, |
| "rewards/margins": 1.3381340503692627, |
| "rewards/rejected": -2.7234160900115967, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 11.530277636718996, |
| "learning_rate": 9.129213483146067e-08, |
| "logps/chosen": -51.81992721557617, |
| "logps/rejected": -78.34001922607422, |
| "loss": 0.3378, |
| "losses/dpo": 0.36353716254234314, |
| "losses/sft": 2.3431830406188965, |
| "losses/total": 0.36353716254234314, |
| "ref_logps/chosen": -38.79859161376953, |
| "ref_logps/rejected": -50.49066925048828, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.302133560180664, |
| "rewards/margins": 1.4828013181686401, |
| "rewards/rejected": -2.7849345207214355, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.51, |
| "grad_norm": 9.663796087127917, |
| "learning_rate": 8.988764044943819e-08, |
| "logps/chosen": -56.09492492675781, |
| "logps/rejected": -77.04325866699219, |
| "loss": 0.3814, |
| "losses/dpo": 0.25816428661346436, |
| "losses/sft": 2.8091163635253906, |
| "losses/total": 0.25816428661346436, |
| "ref_logps/chosen": -40.704925537109375, |
| "ref_logps/rejected": -47.98851013183594, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.5390002727508545, |
| "rewards/margins": 1.3664746284484863, |
| "rewards/rejected": -2.90547513961792, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.51, |
| "grad_norm": 9.817894775320998, |
| "learning_rate": 8.848314606741572e-08, |
| "logps/chosen": -53.74916076660156, |
| "logps/rejected": -71.23921966552734, |
| "loss": 0.4129, |
| "losses/dpo": 0.5282669067382812, |
| "losses/sft": 2.027956962585449, |
| "losses/total": 0.5282669067382812, |
| "ref_logps/chosen": -40.569034576416016, |
| "ref_logps/rejected": -45.03144073486328, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.3180131912231445, |
| "rewards/margins": 1.302764654159546, |
| "rewards/rejected": -2.6207778453826904, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 9.915518749588111, |
| "learning_rate": 8.707865168539325e-08, |
| "logps/chosen": -53.48023986816406, |
| "logps/rejected": -72.4287109375, |
| "loss": 0.4231, |
| "losses/dpo": 0.4929217994213104, |
| "losses/sft": 2.577164888381958, |
| "losses/total": 0.4929217994213104, |
| "ref_logps/chosen": -39.79558563232422, |
| "ref_logps/rejected": -45.82670593261719, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -1.3684654235839844, |
| "rewards/margins": 1.291735291481018, |
| "rewards/rejected": -2.660200595855713, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.53, |
| "grad_norm": 9.335165389726255, |
| "learning_rate": 8.567415730337078e-08, |
| "logps/chosen": -52.41722106933594, |
| "logps/rejected": -71.85494995117188, |
| "loss": 0.3694, |
| "losses/dpo": 0.317619651556015, |
| "losses/sft": 2.0792832374572754, |
| "losses/total": 0.317619651556015, |
| "ref_logps/chosen": -39.57048416137695, |
| "ref_logps/rejected": -46.20240783691406, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.2846734523773193, |
| "rewards/margins": 1.280580997467041, |
| "rewards/rejected": -2.5652544498443604, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.54, |
| "grad_norm": 9.185058383312379, |
| "learning_rate": 8.426966292134831e-08, |
| "logps/chosen": -56.19029235839844, |
| "logps/rejected": -80.49638366699219, |
| "loss": 0.3411, |
| "losses/dpo": 0.2321043312549591, |
| "losses/sft": 1.5318742990493774, |
| "losses/total": 0.2321043312549591, |
| "ref_logps/chosen": -41.53580856323242, |
| "ref_logps/rejected": -50.73744201660156, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.4654479026794434, |
| "rewards/margins": 1.5104467868804932, |
| "rewards/rejected": -2.9758946895599365, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.54, |
| "grad_norm": 8.541745993770446, |
| "learning_rate": 8.286516853932583e-08, |
| "logps/chosen": -49.73480987548828, |
| "logps/rejected": -73.88976287841797, |
| "loss": 0.3512, |
| "losses/dpo": 0.2616669237613678, |
| "losses/sft": 1.7109529972076416, |
| "losses/total": 0.2616669237613678, |
| "ref_logps/chosen": -37.30692672729492, |
| "ref_logps/rejected": -46.78838348388672, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.2427881956100464, |
| "rewards/margins": 1.4673501253128052, |
| "rewards/rejected": -2.7101383209228516, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 10.42078485613911, |
| "learning_rate": 8.146067415730337e-08, |
| "logps/chosen": -52.26924514770508, |
| "logps/rejected": -67.19551086425781, |
| "loss": 0.4575, |
| "losses/dpo": 0.4895854890346527, |
| "losses/sft": 2.276334762573242, |
| "losses/total": 0.4895854890346527, |
| "ref_logps/chosen": -39.17838668823242, |
| "ref_logps/rejected": -43.06511688232422, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.3090859651565552, |
| "rewards/margins": 1.1039537191390991, |
| "rewards/rejected": -2.4130399227142334, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 10.673884752576804, |
| "learning_rate": 8.005617977528089e-08, |
| "logps/chosen": -54.1285514831543, |
| "logps/rejected": -66.32559967041016, |
| "loss": 0.4616, |
| "losses/dpo": 0.3069703280925751, |
| "losses/sft": 1.7043497562408447, |
| "losses/total": 0.3069703280925751, |
| "ref_logps/chosen": -39.07845687866211, |
| "ref_logps/rejected": -40.795433044433594, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -1.5050091743469238, |
| "rewards/margins": 1.0480072498321533, |
| "rewards/rejected": -2.5530166625976562, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.57, |
| "grad_norm": 7.734430324176608, |
| "learning_rate": 7.865168539325842e-08, |
| "logps/chosen": -51.84765625, |
| "logps/rejected": -73.76638793945312, |
| "loss": 0.343, |
| "losses/dpo": 0.21601220965385437, |
| "losses/sft": 1.731180191040039, |
| "losses/total": 0.21601220965385437, |
| "ref_logps/chosen": -38.94496154785156, |
| "ref_logps/rejected": -47.23394012451172, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -1.2902700901031494, |
| "rewards/margins": 1.3629752397537231, |
| "rewards/rejected": -2.653244972229004, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.57, |
| "grad_norm": 9.718523923824968, |
| "learning_rate": 7.724719101123594e-08, |
| "logps/chosen": -54.3284797668457, |
| "logps/rejected": -74.42861938476562, |
| "loss": 0.4342, |
| "losses/dpo": 0.5424623489379883, |
| "losses/sft": 2.519442081451416, |
| "losses/total": 0.5424623489379883, |
| "ref_logps/chosen": -40.32830047607422, |
| "ref_logps/rejected": -47.90203094482422, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4000180959701538, |
| "rewards/margins": 1.2526406049728394, |
| "rewards/rejected": -2.652658700942993, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.58, |
| "grad_norm": 8.766478003038216, |
| "learning_rate": 7.584269662921348e-08, |
| "logps/chosen": -56.134185791015625, |
| "logps/rejected": -74.1839370727539, |
| "loss": 0.3627, |
| "losses/dpo": 0.2985873520374298, |
| "losses/sft": 2.3777570724487305, |
| "losses/total": 0.2985873520374298, |
| "ref_logps/chosen": -40.96846008300781, |
| "ref_logps/rejected": -45.8743896484375, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.5165728330612183, |
| "rewards/margins": 1.3143821954727173, |
| "rewards/rejected": -2.8309547901153564, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.59, |
| "grad_norm": 10.011243908821191, |
| "learning_rate": 7.443820224719101e-08, |
| "logps/chosen": -51.62477493286133, |
| "logps/rejected": -70.33786010742188, |
| "loss": 0.4342, |
| "losses/dpo": 0.337339848279953, |
| "losses/sft": 2.341553211212158, |
| "losses/total": 0.337339848279953, |
| "ref_logps/chosen": -37.967830657958984, |
| "ref_logps/rejected": -45.28611755371094, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.365694522857666, |
| "rewards/margins": 1.1394801139831543, |
| "rewards/rejected": -2.5051746368408203, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 9.59771843166304, |
| "learning_rate": 7.303370786516853e-08, |
| "logps/chosen": -51.54905319213867, |
| "logps/rejected": -71.37974548339844, |
| "loss": 0.4085, |
| "losses/dpo": 0.39244934916496277, |
| "losses/sft": 1.864844799041748, |
| "losses/total": 0.39244934916496277, |
| "ref_logps/chosen": -38.66598129272461, |
| "ref_logps/rejected": -45.7724609375, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.2883074283599854, |
| "rewards/margins": 1.272420883178711, |
| "rewards/rejected": -2.5607285499572754, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 8.700784807594031, |
| "learning_rate": 7.162921348314606e-08, |
| "logps/chosen": -56.775753021240234, |
| "logps/rejected": -77.216796875, |
| "loss": 0.3416, |
| "losses/dpo": 0.22181375324726105, |
| "losses/sft": 2.4348971843719482, |
| "losses/total": 0.22181375324726105, |
| "ref_logps/chosen": -42.833614349365234, |
| "ref_logps/rejected": -49.19655227661133, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -1.3942136764526367, |
| "rewards/margins": 1.4078103303909302, |
| "rewards/rejected": -2.8020238876342773, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.61, |
| "grad_norm": 9.30159664736646, |
| "learning_rate": 7.022471910112359e-08, |
| "logps/chosen": -48.35330581665039, |
| "logps/rejected": -68.90961456298828, |
| "loss": 0.4087, |
| "losses/dpo": 0.43862560391426086, |
| "losses/sft": 1.861382007598877, |
| "losses/total": 0.43862560391426086, |
| "ref_logps/chosen": -35.64689254760742, |
| "ref_logps/rejected": -43.799800872802734, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.270641803741455, |
| "rewards/margins": 1.240339756011963, |
| "rewards/rejected": -2.510981559753418, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.62, |
| "grad_norm": 9.466845106547478, |
| "learning_rate": 6.882022471910112e-08, |
| "logps/chosen": -52.57171630859375, |
| "logps/rejected": -66.74671173095703, |
| "loss": 0.3995, |
| "losses/dpo": 0.5215581655502319, |
| "losses/sft": 2.0002975463867188, |
| "losses/total": 0.5215581655502319, |
| "ref_logps/chosen": -39.363014221191406, |
| "ref_logps/rejected": -41.616329193115234, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.3208706378936768, |
| "rewards/margins": 1.1921679973602295, |
| "rewards/rejected": -2.5130386352539062, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.63, |
| "grad_norm": 8.897974125441753, |
| "learning_rate": 6.741573033707864e-08, |
| "logps/chosen": -54.96879577636719, |
| "logps/rejected": -71.28080749511719, |
| "loss": 0.4172, |
| "losses/dpo": 0.6290773749351501, |
| "losses/sft": 2.65497088432312, |
| "losses/total": 0.6290773749351501, |
| "ref_logps/chosen": -41.37034225463867, |
| "ref_logps/rejected": -45.2210578918457, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -1.3598453998565674, |
| "rewards/margins": 1.2461297512054443, |
| "rewards/rejected": -2.605975389480591, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.63, |
| "grad_norm": 8.450179202589874, |
| "learning_rate": 6.601123595505617e-08, |
| "logps/chosen": -56.42803192138672, |
| "logps/rejected": -78.08199310302734, |
| "loss": 0.3279, |
| "losses/dpo": 0.2672095000743866, |
| "losses/sft": 1.7004587650299072, |
| "losses/total": 0.2672095000743866, |
| "ref_logps/chosen": -43.05862808227539, |
| "ref_logps/rejected": -49.85722732543945, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -1.3369402885437012, |
| "rewards/margins": 1.4855366945266724, |
| "rewards/rejected": -2.822477102279663, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 9.946490252779716, |
| "learning_rate": 6.460674157303371e-08, |
| "logps/chosen": -52.30256652832031, |
| "logps/rejected": -67.33949279785156, |
| "loss": 0.4179, |
| "losses/dpo": 0.23541654646396637, |
| "losses/sft": 1.6304823160171509, |
| "losses/total": 0.23541654646396637, |
| "ref_logps/chosen": -39.795745849609375, |
| "ref_logps/rejected": -43.05610275268555, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.2506815195083618, |
| "rewards/margins": 1.177657961845398, |
| "rewards/rejected": -2.4283392429351807, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 8.420756732795637, |
| "learning_rate": 6.320224719101123e-08, |
| "logps/chosen": -50.62635040283203, |
| "logps/rejected": -71.55708312988281, |
| "loss": 0.3839, |
| "losses/dpo": 0.3438160717487335, |
| "losses/sft": 1.8534799814224243, |
| "losses/total": 0.3438160717487335, |
| "ref_logps/chosen": -39.325897216796875, |
| "ref_logps/rejected": -46.86465835571289, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.1300455331802368, |
| "rewards/margins": 1.3391977548599243, |
| "rewards/rejected": -2.469243049621582, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.66, |
| "grad_norm": 9.697968850403186, |
| "learning_rate": 6.179775280898876e-08, |
| "logps/chosen": -54.621578216552734, |
| "logps/rejected": -70.27922058105469, |
| "loss": 0.4183, |
| "losses/dpo": 0.35120806097984314, |
| "losses/sft": 2.030266284942627, |
| "losses/total": 0.35120806097984314, |
| "ref_logps/chosen": -41.76460266113281, |
| "ref_logps/rejected": -45.33925247192383, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2856972217559814, |
| "rewards/margins": 1.2083001136779785, |
| "rewards/rejected": -2.49399733543396, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.66, |
| "grad_norm": 9.120026907057964, |
| "learning_rate": 6.039325842696629e-08, |
| "logps/chosen": -52.02517318725586, |
| "logps/rejected": -74.53661346435547, |
| "loss": 0.409, |
| "losses/dpo": 0.3873208463191986, |
| "losses/sft": 1.7444610595703125, |
| "losses/total": 0.3873208463191986, |
| "ref_logps/chosen": -38.21184539794922, |
| "ref_logps/rejected": -49.15116882324219, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.3813323974609375, |
| "rewards/margins": 1.157212495803833, |
| "rewards/rejected": -2.5385448932647705, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.67, |
| "grad_norm": 9.69567118291811, |
| "learning_rate": 5.898876404494382e-08, |
| "logps/chosen": -52.73221969604492, |
| "logps/rejected": -70.13288879394531, |
| "loss": 0.4226, |
| "losses/dpo": 0.3050675392150879, |
| "losses/sft": 1.7437413930892944, |
| "losses/total": 0.3050675392150879, |
| "ref_logps/chosen": -40.23515701293945, |
| "ref_logps/rejected": -45.879547119140625, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -1.2497066259384155, |
| "rewards/margins": 1.1756272315979004, |
| "rewards/rejected": -2.4253337383270264, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 8.584053557094956, |
| "learning_rate": 5.758426966292135e-08, |
| "logps/chosen": -57.64381408691406, |
| "logps/rejected": -72.0084457397461, |
| "loss": 0.3835, |
| "losses/dpo": 0.40820345282554626, |
| "losses/sft": 2.4096083641052246, |
| "losses/total": 0.40820345282554626, |
| "ref_logps/chosen": -43.7611198425293, |
| "ref_logps/rejected": -46.36372375488281, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.3882694244384766, |
| "rewards/margins": 1.176202654838562, |
| "rewards/rejected": -2.564471960067749, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.69, |
| "grad_norm": 8.221246825721817, |
| "learning_rate": 5.617977528089887e-08, |
| "logps/chosen": -46.82723617553711, |
| "logps/rejected": -68.99041748046875, |
| "loss": 0.3539, |
| "losses/dpo": 0.29153013229370117, |
| "losses/sft": 1.427022099494934, |
| "losses/total": 0.29153013229370117, |
| "ref_logps/chosen": -35.502262115478516, |
| "ref_logps/rejected": -43.46721649169922, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.132498025894165, |
| "rewards/margins": 1.419821858406067, |
| "rewards/rejected": -2.5523197650909424, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.69, |
| "grad_norm": 9.482797590566683, |
| "learning_rate": 5.47752808988764e-08, |
| "logps/chosen": -51.7399787902832, |
| "logps/rejected": -71.89605712890625, |
| "loss": 0.3979, |
| "losses/dpo": 0.21694956719875336, |
| "losses/sft": 1.9680830240249634, |
| "losses/total": 0.21694956719875336, |
| "ref_logps/chosen": -38.23194122314453, |
| "ref_logps/rejected": -45.57975769042969, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.350803256034851, |
| "rewards/margins": 1.2808265686035156, |
| "rewards/rejected": -2.631629705429077, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 8.17039044420366, |
| "learning_rate": 5.3370786516853926e-08, |
| "logps/chosen": -52.407249450683594, |
| "logps/rejected": -71.58457946777344, |
| "loss": 0.3517, |
| "losses/dpo": 0.45670050382614136, |
| "losses/sft": 2.3598852157592773, |
| "losses/total": 0.45670050382614136, |
| "ref_logps/chosen": -40.96891784667969, |
| "ref_logps/rejected": -45.695167541503906, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -1.143832802772522, |
| "rewards/margins": 1.4451087713241577, |
| "rewards/rejected": -2.5889415740966797, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.71, |
| "grad_norm": 7.803291186480779, |
| "learning_rate": 5.196629213483146e-08, |
| "logps/chosen": -47.12774658203125, |
| "logps/rejected": -70.28164672851562, |
| "loss": 0.331, |
| "losses/dpo": 0.3020516037940979, |
| "losses/sft": 1.725950837135315, |
| "losses/total": 0.3020516037940979, |
| "ref_logps/chosen": -36.658851623535156, |
| "ref_logps/rejected": -44.756195068359375, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.0468891859054565, |
| "rewards/margins": 1.5056557655334473, |
| "rewards/rejected": -2.5525450706481934, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.72, |
| "grad_norm": 8.941952443820963, |
| "learning_rate": 5.056179775280899e-08, |
| "logps/chosen": -51.66205596923828, |
| "logps/rejected": -69.529296875, |
| "loss": 0.3878, |
| "losses/dpo": 0.4756266474723816, |
| "losses/sft": 1.6768076419830322, |
| "losses/total": 0.4756266474723816, |
| "ref_logps/chosen": -39.167083740234375, |
| "ref_logps/rejected": -45.206932067871094, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.2494975328445435, |
| "rewards/margins": 1.1827386617660522, |
| "rewards/rejected": -2.4322359561920166, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.72, |
| "grad_norm": 9.404438765920613, |
| "learning_rate": 4.915730337078652e-08, |
| "logps/chosen": -54.10792922973633, |
| "logps/rejected": -70.66584014892578, |
| "loss": 0.385, |
| "losses/dpo": 0.25079599022865295, |
| "losses/sft": 2.628451108932495, |
| "losses/total": 0.25079599022865295, |
| "ref_logps/chosen": -41.44969177246094, |
| "ref_logps/rejected": -44.49009704589844, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2658233642578125, |
| "rewards/margins": 1.3517518043518066, |
| "rewards/rejected": -2.6175754070281982, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.73, |
| "grad_norm": 8.339761715598929, |
| "learning_rate": 4.775280898876404e-08, |
| "logps/chosen": -52.00060272216797, |
| "logps/rejected": -70.13935852050781, |
| "loss": 0.3657, |
| "losses/dpo": 0.410220742225647, |
| "losses/sft": 2.064330577850342, |
| "losses/total": 0.410220742225647, |
| "ref_logps/chosen": -38.8874626159668, |
| "ref_logps/rejected": -44.29195785522461, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -1.311313509941101, |
| "rewards/margins": 1.27342689037323, |
| "rewards/rejected": -2.584740400314331, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.74, |
| "grad_norm": 8.695799719443274, |
| "learning_rate": 4.634831460674157e-08, |
| "logps/chosen": -54.19892120361328, |
| "logps/rejected": -70.33839416503906, |
| "loss": 0.3834, |
| "losses/dpo": 0.4246940612792969, |
| "losses/sft": 1.6766891479492188, |
| "losses/total": 0.4246940612792969, |
| "ref_logps/chosen": -40.98695755004883, |
| "ref_logps/rejected": -44.57014846801758, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -1.3211965560913086, |
| "rewards/margins": 1.255626916885376, |
| "rewards/rejected": -2.5768234729766846, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 7.953708445192091, |
| "learning_rate": 4.4943820224719096e-08, |
| "logps/chosen": -51.214752197265625, |
| "logps/rejected": -75.3336181640625, |
| "loss": 0.3295, |
| "losses/dpo": 0.17288488149642944, |
| "losses/sft": 2.220893383026123, |
| "losses/total": 0.17288488149642944, |
| "ref_logps/chosen": -38.46108627319336, |
| "ref_logps/rejected": -48.44053268432617, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -1.2753666639328003, |
| "rewards/margins": 1.4139418601989746, |
| "rewards/rejected": -2.6893081665039062, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 8.821196988330435, |
| "learning_rate": 4.3539325842696626e-08, |
| "logps/chosen": -56.51776123046875, |
| "logps/rejected": -75.40132904052734, |
| "loss": 0.35, |
| "losses/dpo": 0.169864684343338, |
| "losses/sft": 2.520303964614868, |
| "losses/total": 0.169864684343338, |
| "ref_logps/chosen": -43.79001235961914, |
| "ref_logps/rejected": -48.247989654541016, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.2727751731872559, |
| "rewards/margins": 1.4425586462020874, |
| "rewards/rejected": -2.715333938598633, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 9.689618011799487, |
| "learning_rate": 4.213483146067416e-08, |
| "logps/chosen": -57.19207000732422, |
| "logps/rejected": -72.71266174316406, |
| "loss": 0.407, |
| "losses/dpo": 0.27488580346107483, |
| "losses/sft": 1.8573498725891113, |
| "losses/total": 0.27488580346107483, |
| "ref_logps/chosen": -43.24185562133789, |
| "ref_logps/rejected": -46.41039276123047, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.3950214385986328, |
| "rewards/margins": 1.2352051734924316, |
| "rewards/rejected": -2.6302266120910645, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.77, |
| "grad_norm": 9.338062839876327, |
| "learning_rate": 4.073033707865169e-08, |
| "logps/chosen": -50.966712951660156, |
| "logps/rejected": -68.7747802734375, |
| "loss": 0.4169, |
| "losses/dpo": 0.3666359782218933, |
| "losses/sft": 2.0023789405822754, |
| "losses/total": 0.3666359782218933, |
| "ref_logps/chosen": -38.069068908691406, |
| "ref_logps/rejected": -44.10002899169922, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -1.2897647619247437, |
| "rewards/margins": 1.1777102947235107, |
| "rewards/rejected": -2.467475175857544, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.78, |
| "grad_norm": 7.949833391744097, |
| "learning_rate": 3.932584269662921e-08, |
| "logps/chosen": -47.72471237182617, |
| "logps/rejected": -70.84202575683594, |
| "loss": 0.3803, |
| "losses/dpo": 0.3086835443973541, |
| "losses/sft": 1.9907643795013428, |
| "losses/total": 0.3086835443973541, |
| "ref_logps/chosen": -35.20145034790039, |
| "ref_logps/rejected": -44.49205780029297, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.2523258924484253, |
| "rewards/margins": 1.3826706409454346, |
| "rewards/rejected": -2.6349964141845703, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.78, |
| "grad_norm": 9.235066572868517, |
| "learning_rate": 3.792134831460674e-08, |
| "logps/chosen": -52.30189895629883, |
| "logps/rejected": -70.8028335571289, |
| "loss": 0.3852, |
| "losses/dpo": 0.2891031503677368, |
| "losses/sft": 1.8405730724334717, |
| "losses/total": 0.2891031503677368, |
| "ref_logps/chosen": -39.477725982666016, |
| "ref_logps/rejected": -45.31201934814453, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.2824174165725708, |
| "rewards/margins": 1.266663908958435, |
| "rewards/rejected": -2.549081563949585, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.79, |
| "grad_norm": 9.190353581688715, |
| "learning_rate": 3.6516853932584266e-08, |
| "logps/chosen": -49.18308639526367, |
| "logps/rejected": -67.69566345214844, |
| "loss": 0.4, |
| "losses/dpo": 0.3436277508735657, |
| "losses/sft": 2.0218303203582764, |
| "losses/total": 0.3436277508735657, |
| "ref_logps/chosen": -36.68975830078125, |
| "ref_logps/rejected": -43.71223449707031, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.2493327856063843, |
| "rewards/margins": 1.1490094661712646, |
| "rewards/rejected": -2.3983423709869385, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 8.148479571122722, |
| "learning_rate": 3.5112359550561796e-08, |
| "logps/chosen": -51.570220947265625, |
| "logps/rejected": -71.7831039428711, |
| "loss": 0.3307, |
| "losses/dpo": 0.5651198625564575, |
| "losses/sft": 2.007855176925659, |
| "losses/total": 0.5651198625564575, |
| "ref_logps/chosen": -40.369407653808594, |
| "ref_logps/rejected": -46.21443176269531, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.1200807094573975, |
| "rewards/margins": 1.4367868900299072, |
| "rewards/rejected": -2.5568673610687256, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.81, |
| "grad_norm": 9.663237502055296, |
| "learning_rate": 3.370786516853932e-08, |
| "logps/chosen": -55.40485382080078, |
| "logps/rejected": -73.81289672851562, |
| "loss": 0.4029, |
| "losses/dpo": 0.7689430713653564, |
| "losses/sft": 1.9435756206512451, |
| "losses/total": 0.7689430713653564, |
| "ref_logps/chosen": -42.52079772949219, |
| "ref_logps/rejected": -47.747249603271484, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.2884055376052856, |
| "rewards/margins": 1.3181602954864502, |
| "rewards/rejected": -2.6065659523010254, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.82, |
| "grad_norm": 8.41997319865235, |
| "learning_rate": 3.230337078651686e-08, |
| "logps/chosen": -55.254249572753906, |
| "logps/rejected": -81.1642074584961, |
| "loss": 0.3146, |
| "losses/dpo": 0.3366415500640869, |
| "losses/sft": 1.8378387689590454, |
| "losses/total": 0.3366415500640869, |
| "ref_logps/chosen": -41.7273063659668, |
| "ref_logps/rejected": -51.86448669433594, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -1.3526947498321533, |
| "rewards/margins": 1.577277421951294, |
| "rewards/rejected": -2.929971933364868, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.82, |
| "grad_norm": 9.08048419718233, |
| "learning_rate": 3.089887640449438e-08, |
| "logps/chosen": -52.86750793457031, |
| "logps/rejected": -77.41828918457031, |
| "loss": 0.3294, |
| "losses/dpo": 0.23508216440677643, |
| "losses/sft": 1.5872150659561157, |
| "losses/total": 0.23508216440677643, |
| "ref_logps/chosen": -39.141883850097656, |
| "ref_logps/rejected": -49.06714630126953, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.3725626468658447, |
| "rewards/margins": 1.4625511169433594, |
| "rewards/rejected": -2.835113763809204, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.83, |
| "grad_norm": 9.149313086592246, |
| "learning_rate": 2.949438202247191e-08, |
| "logps/chosen": -50.02390670776367, |
| "logps/rejected": -75.56754302978516, |
| "loss": 0.362, |
| "losses/dpo": 0.6652272939682007, |
| "losses/sft": 2.926239252090454, |
| "losses/total": 0.6652272939682007, |
| "ref_logps/chosen": -37.26597595214844, |
| "ref_logps/rejected": -48.15172576904297, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2757928371429443, |
| "rewards/margins": 1.4657888412475586, |
| "rewards/rejected": -2.741581916809082, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 9.114150173411574, |
| "learning_rate": 2.8089887640449436e-08, |
| "logps/chosen": -55.91659164428711, |
| "logps/rejected": -74.04378509521484, |
| "loss": 0.3668, |
| "losses/dpo": 0.2893008589744568, |
| "losses/sft": 2.1376471519470215, |
| "losses/total": 0.2893008589744568, |
| "ref_logps/chosen": -42.005611419677734, |
| "ref_logps/rejected": -46.799495697021484, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -1.3910987377166748, |
| "rewards/margins": 1.333329677581787, |
| "rewards/rejected": -2.724428415298462, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 10.02587161317734, |
| "learning_rate": 2.6685393258426963e-08, |
| "logps/chosen": -53.21587371826172, |
| "logps/rejected": -71.60870361328125, |
| "loss": 0.4256, |
| "losses/dpo": 0.7751315236091614, |
| "losses/sft": 2.11029314994812, |
| "losses/total": 0.7751315236091614, |
| "ref_logps/chosen": -40.663108825683594, |
| "ref_logps/rejected": -46.480648040771484, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.2552767992019653, |
| "rewards/margins": 1.25752854347229, |
| "rewards/rejected": -2.512805700302124, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 8.810937586248796, |
| "learning_rate": 2.5280898876404493e-08, |
| "logps/chosen": -52.51762008666992, |
| "logps/rejected": -77.57078552246094, |
| "loss": 0.3381, |
| "losses/dpo": 0.28002500534057617, |
| "losses/sft": 1.5633399486541748, |
| "losses/total": 0.28002500534057617, |
| "ref_logps/chosen": -39.36627960205078, |
| "ref_logps/rejected": -50.626708984375, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.315134048461914, |
| "rewards/margins": 1.3792742490768433, |
| "rewards/rejected": -2.6944081783294678, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.86, |
| "grad_norm": 9.293485858540686, |
| "learning_rate": 2.387640449438202e-08, |
| "logps/chosen": -51.378074645996094, |
| "logps/rejected": -64.74043273925781, |
| "loss": 0.4262, |
| "losses/dpo": 0.7660055756568909, |
| "losses/sft": 2.2007508277893066, |
| "losses/total": 0.7660055756568909, |
| "ref_logps/chosen": -39.29324722290039, |
| "ref_logps/rejected": -41.33624267578125, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -1.2084828615188599, |
| "rewards/margins": 1.131935954093933, |
| "rewards/rejected": -2.340418815612793, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.87, |
| "grad_norm": 8.825439696536032, |
| "learning_rate": 2.2471910112359548e-08, |
| "logps/chosen": -55.97784423828125, |
| "logps/rejected": -78.24275207519531, |
| "loss": 0.345, |
| "losses/dpo": 0.33630573749542236, |
| "losses/sft": 2.7268307209014893, |
| "losses/total": 0.33630573749542236, |
| "ref_logps/chosen": -41.14834976196289, |
| "ref_logps/rejected": -49.048004150390625, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -1.4829493761062622, |
| "rewards/margins": 1.4365259408950806, |
| "rewards/rejected": -2.9194750785827637, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 8.683259186904719, |
| "learning_rate": 2.106741573033708e-08, |
| "logps/chosen": -52.257469177246094, |
| "logps/rejected": -67.984130859375, |
| "loss": 0.4087, |
| "losses/dpo": 0.3859608471393585, |
| "losses/sft": 1.8246614933013916, |
| "losses/total": 0.3859608471393585, |
| "ref_logps/chosen": -38.84019470214844, |
| "ref_logps/rejected": -43.60353088378906, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.341727375984192, |
| "rewards/margins": 1.0963327884674072, |
| "rewards/rejected": -2.4380600452423096, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 8.270857768884154, |
| "learning_rate": 1.9662921348314606e-08, |
| "logps/chosen": -54.033363342285156, |
| "logps/rejected": -77.31697082519531, |
| "loss": 0.3287, |
| "losses/dpo": 0.19281096756458282, |
| "losses/sft": 1.8291985988616943, |
| "losses/total": 0.19281096756458282, |
| "ref_logps/chosen": -39.56074523925781, |
| "ref_logps/rejected": -47.55027770996094, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.4472615718841553, |
| "rewards/margins": 1.5294086933135986, |
| "rewards/rejected": -2.9766697883605957, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.89, |
| "grad_norm": 9.68267538629506, |
| "learning_rate": 1.8258426966292133e-08, |
| "logps/chosen": -53.385498046875, |
| "logps/rejected": -68.63336181640625, |
| "loss": 0.391, |
| "losses/dpo": 0.5480431318283081, |
| "losses/sft": 2.3586955070495605, |
| "losses/total": 0.5480431318283081, |
| "ref_logps/chosen": -40.53166198730469, |
| "ref_logps/rejected": -44.218746185302734, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.2853829860687256, |
| "rewards/margins": 1.156078815460205, |
| "rewards/rejected": -2.4414615631103516, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 8.588925065399472, |
| "learning_rate": 1.685393258426966e-08, |
| "logps/chosen": -53.107017517089844, |
| "logps/rejected": -73.8092041015625, |
| "loss": 0.3472, |
| "losses/dpo": 0.5091351866722107, |
| "losses/sft": 2.4279067516326904, |
| "losses/total": 0.5091351866722107, |
| "ref_logps/chosen": -39.874427795410156, |
| "ref_logps/rejected": -47.138092041015625, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.3232589960098267, |
| "rewards/margins": 1.34385085105896, |
| "rewards/rejected": -2.667109966278076, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.91, |
| "grad_norm": 9.061222152581863, |
| "learning_rate": 1.544943820224719e-08, |
| "logps/chosen": -55.36473846435547, |
| "logps/rejected": -71.19318389892578, |
| "loss": 0.3925, |
| "losses/dpo": 0.7466526627540588, |
| "losses/sft": 2.359135627746582, |
| "losses/total": 0.7466526627540588, |
| "ref_logps/chosen": -41.025726318359375, |
| "ref_logps/rejected": -44.06968688964844, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.4339020252227783, |
| "rewards/margins": 1.2784475088119507, |
| "rewards/rejected": -2.7123494148254395, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.91, |
| "grad_norm": 9.909969914239525, |
| "learning_rate": 1.4044943820224718e-08, |
| "logps/chosen": -51.63406753540039, |
| "logps/rejected": -77.14066314697266, |
| "loss": 0.3813, |
| "losses/dpo": 0.6047529578208923, |
| "losses/sft": 1.7509853839874268, |
| "losses/total": 0.6047529578208923, |
| "ref_logps/chosen": -37.65089416503906, |
| "ref_logps/rejected": -48.93791198730469, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.398316740989685, |
| "rewards/margins": 1.4219584465026855, |
| "rewards/rejected": -2.82027530670166, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 8.501319268805696, |
| "learning_rate": 1.2640449438202247e-08, |
| "logps/chosen": -53.17372512817383, |
| "logps/rejected": -68.52401733398438, |
| "loss": 0.3433, |
| "losses/dpo": 0.2757856249809265, |
| "losses/sft": 2.1045045852661133, |
| "losses/total": 0.2757856249809265, |
| "ref_logps/chosen": -40.52703857421875, |
| "ref_logps/rejected": -42.728126525878906, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.264668345451355, |
| "rewards/margins": 1.3149209022521973, |
| "rewards/rejected": -2.579589366912842, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.93, |
| "grad_norm": 8.718869385679747, |
| "learning_rate": 1.1235955056179774e-08, |
| "logps/chosen": -54.90761184692383, |
| "logps/rejected": -70.40953826904297, |
| "loss": 0.3789, |
| "losses/dpo": 0.5766834020614624, |
| "losses/sft": 2.222163200378418, |
| "losses/total": 0.5766834020614624, |
| "ref_logps/chosen": -41.6505012512207, |
| "ref_logps/rejected": -44.63560485839844, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -1.3257105350494385, |
| "rewards/margins": 1.2516822814941406, |
| "rewards/rejected": -2.577392816543579, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 9.426380018310086, |
| "learning_rate": 9.831460674157303e-09, |
| "logps/chosen": -54.11854934692383, |
| "logps/rejected": -71.68621826171875, |
| "loss": 0.386, |
| "losses/dpo": 0.20794588327407837, |
| "losses/sft": 2.1166539192199707, |
| "losses/total": 0.20794588327407837, |
| "ref_logps/chosen": -40.966426849365234, |
| "ref_logps/rejected": -45.38508605957031, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -1.3152116537094116, |
| "rewards/margins": 1.3149020671844482, |
| "rewards/rejected": -2.6301136016845703, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 10.46768940793886, |
| "learning_rate": 8.42696629213483e-09, |
| "logps/chosen": -54.164424896240234, |
| "logps/rejected": -71.98051452636719, |
| "loss": 0.4893, |
| "losses/dpo": 0.39178702235221863, |
| "losses/sft": 2.2996134757995605, |
| "losses/total": 0.39178702235221863, |
| "ref_logps/chosen": -40.31678009033203, |
| "ref_logps/rejected": -46.53054428100586, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.384764313697815, |
| "rewards/margins": 1.1602333784103394, |
| "rewards/rejected": -2.5449976921081543, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 8.941416739373434, |
| "learning_rate": 7.022471910112359e-09, |
| "logps/chosen": -53.463233947753906, |
| "logps/rejected": -71.62788391113281, |
| "loss": 0.3318, |
| "losses/dpo": 0.2409718632698059, |
| "losses/sft": 1.6294231414794922, |
| "losses/total": 0.2409718632698059, |
| "ref_logps/chosen": -41.0477294921875, |
| "ref_logps/rejected": -44.89318084716797, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -1.2415508031845093, |
| "rewards/margins": 1.4319190979003906, |
| "rewards/rejected": -2.6734697818756104, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 8.98210941116973, |
| "learning_rate": 5.617977528089887e-09, |
| "logps/chosen": -54.616600036621094, |
| "logps/rejected": -73.2689208984375, |
| "loss": 0.3832, |
| "losses/dpo": 0.43616408109664917, |
| "losses/sft": 2.2494640350341797, |
| "losses/total": 0.43616408109664917, |
| "ref_logps/chosen": -40.2120361328125, |
| "ref_logps/rejected": -46.42675018310547, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -1.4404562711715698, |
| "rewards/margins": 1.2437611818313599, |
| "rewards/rejected": -2.684217691421509, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.97, |
| "grad_norm": 9.151862468783703, |
| "learning_rate": 4.213483146067415e-09, |
| "logps/chosen": -51.817893981933594, |
| "logps/rejected": -69.46862030029297, |
| "loss": 0.3906, |
| "losses/dpo": 0.2829042077064514, |
| "losses/sft": 2.443455696105957, |
| "losses/total": 0.2829042077064514, |
| "ref_logps/chosen": -38.848846435546875, |
| "ref_logps/rejected": -43.35674285888672, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.2969045639038086, |
| "rewards/margins": 1.3142831325531006, |
| "rewards/rejected": -2.61118745803833, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.97, |
| "grad_norm": 10.678776530633808, |
| "learning_rate": 2.8089887640449435e-09, |
| "logps/chosen": -54.69252014160156, |
| "logps/rejected": -73.899658203125, |
| "loss": 0.4591, |
| "losses/dpo": 0.4431733191013336, |
| "losses/sft": 2.1250791549682617, |
| "losses/total": 0.4431733191013336, |
| "ref_logps/chosen": -39.718658447265625, |
| "ref_logps/rejected": -48.00882339477539, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.4973857402801514, |
| "rewards/margins": 1.0916969776153564, |
| "rewards/rejected": -2.589082717895508, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.98, |
| "grad_norm": 8.192368817594446, |
| "learning_rate": 1.4044943820224717e-09, |
| "logps/chosen": -50.57025909423828, |
| "logps/rejected": -69.49359130859375, |
| "loss": 0.3625, |
| "losses/dpo": 0.550754964351654, |
| "losses/sft": 2.1057140827178955, |
| "losses/total": 0.550754964351654, |
| "ref_logps/chosen": -38.39902877807617, |
| "ref_logps/rejected": -43.90056228637695, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2171236276626587, |
| "rewards/margins": 1.342179298400879, |
| "rewards/rejected": -2.559302806854248, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.99, |
| "grad_norm": 8.91219728509694, |
| "learning_rate": 0.0, |
| "logps/chosen": -56.36674499511719, |
| "logps/rejected": -75.95218658447266, |
| "loss": 0.3695, |
| "losses/dpo": 0.45165300369262695, |
| "losses/sft": 1.7463542222976685, |
| "losses/total": 0.45165300369262695, |
| "ref_logps/chosen": -42.22056198120117, |
| "ref_logps/rejected": -48.152099609375, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -1.4146177768707275, |
| "rewards/margins": 1.3653908967971802, |
| "rewards/rejected": -2.7800087928771973, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.99, |
| "step": 396, |
| "total_flos": 0.0, |
| "train_loss": 0.5140665640132596, |
| "train_runtime": 34070.7646, |
| "train_samples_per_second": 1.493, |
| "train_steps_per_second": 0.012 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 396, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 70, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|