| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.09136592051164916, | |
| "eval_steps": 500, | |
| "global_step": 250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003654636820465966, | |
| "grad_norm": 70.50516510009766, | |
| "kl": 0.0, | |
| "learning_rate": 1e-05, | |
| "logits/chosen": -66672234.666666664, | |
| "logits/rejected": -85497435.42857143, | |
| "logps/chosen": -414.2180447048611, | |
| "logps/rejected": -344.10721261160717, | |
| "loss": 0.275, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007309273640931932, | |
| "grad_norm": 69.13626861572266, | |
| "kl": 0.0, | |
| "learning_rate": 2e-05, | |
| "logits/chosen": -66327543.46666667, | |
| "logits/rejected": -48240937.4117647, | |
| "logps/chosen": -422.21435546875, | |
| "logps/rejected": -276.88039981617646, | |
| "loss": 0.3125, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0010963910461397899, | |
| "grad_norm": 94.15017700195312, | |
| "kl": 0.10230207443237305, | |
| "learning_rate": 3e-05, | |
| "logits/chosen": -77653326.76923077, | |
| "logits/rejected": -69068126.31578948, | |
| "logps/chosen": -427.74459134615387, | |
| "logps/rejected": -336.28831722861844, | |
| "loss": 0.3289, | |
| "rewards/chosen": -0.02957458679492657, | |
| "rewards/margins": 0.010732028407123888, | |
| "rewards/rejected": -0.04030661520205046, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0014618547281863865, | |
| "grad_norm": 99.499267578125, | |
| "kl": 0.0018157958984375, | |
| "learning_rate": 4e-05, | |
| "logits/chosen": -63872645.333333336, | |
| "logits/rejected": -70730592.0, | |
| "logps/chosen": -430.9491780598958, | |
| "logps/rejected": -360.9772705078125, | |
| "loss": 0.3189, | |
| "rewards/chosen": -0.022309874494870503, | |
| "rewards/margins": 0.18317682842413582, | |
| "rewards/rejected": -0.20548670291900634, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.001827318410232983, | |
| "grad_norm": 103.61483764648438, | |
| "kl": 0.012647151947021484, | |
| "learning_rate": 5e-05, | |
| "logits/chosen": -60096945.23076923, | |
| "logits/rejected": -65103366.7368421, | |
| "logps/chosen": -292.55611478365387, | |
| "logps/rejected": -460.35911800986844, | |
| "loss": 0.2501, | |
| "rewards/chosen": -0.13560827878805307, | |
| "rewards/margins": 0.5146523955379904, | |
| "rewards/rejected": -0.6502606743260434, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0021927820922795797, | |
| "grad_norm": 64.95700073242188, | |
| "kl": 0.0, | |
| "learning_rate": 6e-05, | |
| "logits/chosen": -89392679.38461539, | |
| "logits/rejected": -54245170.526315786, | |
| "logps/chosen": -412.2142803485577, | |
| "logps/rejected": -375.92197779605266, | |
| "loss": 0.1876, | |
| "rewards/chosen": -0.5762283618633564, | |
| "rewards/margins": 0.7616908733661358, | |
| "rewards/rejected": -1.3379192352294922, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0025582457743261763, | |
| "grad_norm": 21.49436378479004, | |
| "kl": 0.0, | |
| "learning_rate": 7e-05, | |
| "logits/chosen": -72870521.26315789, | |
| "logits/rejected": -79904659.6923077, | |
| "logps/chosen": -391.34200246710526, | |
| "logps/rejected": -518.6966271033654, | |
| "loss": 0.1233, | |
| "rewards/chosen": -1.5921392942729748, | |
| "rewards/margins": 2.0520765096069833, | |
| "rewards/rejected": -3.644215803879958, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.002923709456372773, | |
| "grad_norm": 15.191149711608887, | |
| "kl": 0.0, | |
| "learning_rate": 8e-05, | |
| "logits/chosen": -80619752.0, | |
| "logits/rejected": -63836768.0, | |
| "logps/chosen": -438.9120788574219, | |
| "logps/rejected": -462.61907958984375, | |
| "loss": 0.1209, | |
| "rewards/chosen": -2.851921319961548, | |
| "rewards/margins": 2.961158037185669, | |
| "rewards/rejected": -5.813079357147217, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0032891731384193696, | |
| "grad_norm": 10.538928031921387, | |
| "kl": 0.0, | |
| "learning_rate": 9e-05, | |
| "logits/chosen": -69753120.0, | |
| "logits/rejected": -67056665.6, | |
| "logps/chosen": -415.3586832682292, | |
| "logps/rejected": -357.4203125, | |
| "loss": 0.0958, | |
| "rewards/chosen": -4.56794802347819, | |
| "rewards/margins": 3.072142155965169, | |
| "rewards/rejected": -7.6400901794433596, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.003654636820465966, | |
| "grad_norm": 3.222597122192383, | |
| "kl": 0.0, | |
| "learning_rate": 0.0001, | |
| "logits/chosen": -84851421.86666666, | |
| "logits/rejected": -61368357.64705882, | |
| "logps/chosen": -384.41090494791666, | |
| "logps/rejected": -465.8205135569853, | |
| "loss": 0.0918, | |
| "rewards/chosen": -6.086501057942709, | |
| "rewards/margins": 7.457770074582568, | |
| "rewards/rejected": -13.544271132525276, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004020100502512563, | |
| "grad_norm": 0.7196786403656006, | |
| "kl": 0.0, | |
| "learning_rate": 9.99989723479183e-05, | |
| "logits/chosen": -81370248.0, | |
| "logits/rejected": -91569408.0, | |
| "logps/chosen": -442.4510803222656, | |
| "logps/rejected": -516.6836547851562, | |
| "loss": 0.1001, | |
| "rewards/chosen": -10.411160469055176, | |
| "rewards/margins": 11.409586906433105, | |
| "rewards/rejected": -21.82074737548828, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.004385564184559159, | |
| "grad_norm": 2.4945011138916016, | |
| "kl": 0.0, | |
| "learning_rate": 9.999588943391597e-05, | |
| "logits/chosen": -118624777.14285715, | |
| "logits/rejected": -81275242.66666667, | |
| "logps/chosen": -437.8573521205357, | |
| "logps/rejected": -597.7122938368055, | |
| "loss": 0.0845, | |
| "rewards/chosen": -10.227762494768415, | |
| "rewards/margins": 20.10963645813957, | |
| "rewards/rejected": -30.337398952907986, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0047510278666057565, | |
| "grad_norm": 0.59647136926651, | |
| "kl": 0.0, | |
| "learning_rate": 9.999075138471951e-05, | |
| "logits/chosen": -115453986.13333334, | |
| "logits/rejected": -76846275.76470588, | |
| "logps/chosen": -453.15276692708335, | |
| "logps/rejected": -557.3473690257352, | |
| "loss": 0.0934, | |
| "rewards/chosen": -13.898647054036458, | |
| "rewards/margins": 14.38333895814185, | |
| "rewards/rejected": -28.281986012178308, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.005116491548652353, | |
| "grad_norm": 4.384544849395752, | |
| "kl": 0.0, | |
| "learning_rate": 9.9983558411534e-05, | |
| "logits/chosen": -112966163.6923077, | |
| "logits/rejected": -103012412.63157895, | |
| "logps/chosen": -486.83071664663464, | |
| "logps/rejected": -487.32252261513156, | |
| "loss": 0.0863, | |
| "rewards/chosen": -14.020332923302284, | |
| "rewards/margins": 13.25188708594936, | |
| "rewards/rejected": -27.272220009251644, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.00548195523069895, | |
| "grad_norm": 0.7163640856742859, | |
| "kl": 0.0, | |
| "learning_rate": 9.99743108100344e-05, | |
| "logits/chosen": -101596401.77777778, | |
| "logits/rejected": -95033782.85714285, | |
| "logps/chosen": -531.5575629340278, | |
| "logps/rejected": -488.21561104910717, | |
| "loss": 0.1123, | |
| "rewards/chosen": -14.693433973524305, | |
| "rewards/margins": 14.402351984902035, | |
| "rewards/rejected": -29.09578595842634, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.005847418912745546, | |
| "grad_norm": 2.3758387565612793, | |
| "kl": 0.0, | |
| "learning_rate": 9.996300896035339e-05, | |
| "logits/chosen": -66563285.333333336, | |
| "logits/rejected": -116772242.28571428, | |
| "logps/chosen": -412.4872775607639, | |
| "logps/rejected": -560.7509765625, | |
| "loss": 0.1082, | |
| "rewards/chosen": -11.090181986490885, | |
| "rewards/margins": 20.835895719982332, | |
| "rewards/rejected": -31.926077706473215, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006212882594792143, | |
| "grad_norm": 4.935825824737549, | |
| "kl": 0.0, | |
| "learning_rate": 9.994965332706573e-05, | |
| "logits/chosen": -95241365.33333333, | |
| "logits/rejected": -72146628.57142857, | |
| "logps/chosen": -357.1007486979167, | |
| "logps/rejected": -425.796630859375, | |
| "loss": 0.1061, | |
| "rewards/chosen": -10.44845920138889, | |
| "rewards/margins": 16.569448077489458, | |
| "rewards/rejected": -27.017907278878347, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.006578346276838739, | |
| "grad_norm": 2.1070468425750732, | |
| "kl": 0.0, | |
| "learning_rate": 9.993424445916923e-05, | |
| "logits/chosen": -95185226.66666667, | |
| "logits/rejected": -59045593.6, | |
| "logps/chosen": -488.2670491536458, | |
| "logps/rejected": -467.557177734375, | |
| "loss": 0.0739, | |
| "rewards/chosen": -12.333049774169922, | |
| "rewards/margins": 15.72508010864258, | |
| "rewards/rejected": -28.0581298828125, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.006943809958885336, | |
| "grad_norm": 2.696626901626587, | |
| "kl": 0.0, | |
| "learning_rate": 9.991678299006205e-05, | |
| "logits/chosen": -100491392.0, | |
| "logits/rejected": -78168736.0, | |
| "logps/chosen": -447.7462972005208, | |
| "logps/rejected": -548.93388671875, | |
| "loss": 0.0735, | |
| "rewards/chosen": -9.27017593383789, | |
| "rewards/margins": 20.79569320678711, | |
| "rewards/rejected": -30.065869140625, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.007309273640931932, | |
| "grad_norm": 7.118461608886719, | |
| "kl": 0.0, | |
| "learning_rate": 9.989726963751682e-05, | |
| "logits/chosen": -79133110.85714285, | |
| "logits/rejected": -78483392.0, | |
| "logps/chosen": -368.60341099330356, | |
| "logps/rejected": -551.7664388020834, | |
| "loss": 0.0808, | |
| "rewards/chosen": -5.415279933384487, | |
| "rewards/margins": 22.936009603833394, | |
| "rewards/rejected": -28.35128953721788, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007674737322978529, | |
| "grad_norm": 14.0170259475708, | |
| "kl": 0.0, | |
| "learning_rate": 9.987570520365104e-05, | |
| "logits/chosen": -94006579.2, | |
| "logits/rejected": -65066563.76470588, | |
| "logps/chosen": -470.67978515625, | |
| "logps/rejected": -464.52550551470586, | |
| "loss": 0.0782, | |
| "rewards/chosen": -3.98671137491862, | |
| "rewards/margins": 18.300774981928807, | |
| "rewards/rejected": -22.287486356847428, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.008040201005025126, | |
| "grad_norm": 11.112717628479004, | |
| "kl": 0.0, | |
| "learning_rate": 9.98520905748941e-05, | |
| "logits/chosen": -58957184.0, | |
| "logits/rejected": -82073927.1111111, | |
| "logps/chosen": -377.43223353794644, | |
| "logps/rejected": -580.0238715277778, | |
| "loss": 0.0608, | |
| "rewards/chosen": -1.3486518859863281, | |
| "rewards/margins": 23.146312713623047, | |
| "rewards/rejected": -24.494964599609375, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008405664687071723, | |
| "grad_norm": 11.628118515014648, | |
| "kl": 0.5444526672363281, | |
| "learning_rate": 9.982642672195092e-05, | |
| "logits/chosen": -73654023.52941176, | |
| "logits/rejected": -45703005.86666667, | |
| "logps/chosen": -409.02473000919116, | |
| "logps/rejected": -438.523046875, | |
| "loss": 0.0573, | |
| "rewards/chosen": -0.02788083693560432, | |
| "rewards/margins": 15.746053082335228, | |
| "rewards/rejected": -15.773933919270833, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.008771128369118319, | |
| "grad_norm": 19.863908767700195, | |
| "kl": 1.022608757019043, | |
| "learning_rate": 9.979871469976196e-05, | |
| "logits/chosen": -53002395.82608695, | |
| "logits/rejected": -62163328.0, | |
| "logps/chosen": -303.9137015964674, | |
| "logps/rejected": -471.96875, | |
| "loss": 0.0552, | |
| "rewards/chosen": 2.6770420903744907, | |
| "rewards/margins": 15.497729278417024, | |
| "rewards/rejected": -12.820687188042534, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.009136592051164915, | |
| "grad_norm": 5.07356071472168, | |
| "kl": 1.4929475784301758, | |
| "learning_rate": 9.976895564745991e-05, | |
| "logits/chosen": -52722089.14285714, | |
| "logits/rejected": -63057265.777777776, | |
| "logps/chosen": -374.541015625, | |
| "logps/rejected": -504.29741753472223, | |
| "loss": 0.0122, | |
| "rewards/chosen": 5.3456540788922995, | |
| "rewards/margins": 18.508099994962176, | |
| "rewards/rejected": -13.162445916069878, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.009502055733211513, | |
| "grad_norm": 9.457200050354004, | |
| "kl": 4.141006946563721, | |
| "learning_rate": 9.973715078832288e-05, | |
| "logits/chosen": -57916832.0, | |
| "logits/rejected": -60996544.0, | |
| "logps/chosen": -423.02412109375, | |
| "logps/rejected": -527.6825764973959, | |
| "loss": 0.0382, | |
| "rewards/chosen": 4.26049575805664, | |
| "rewards/margins": 15.99791997273763, | |
| "rewards/rejected": -11.73742421468099, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.009867519415258109, | |
| "grad_norm": 16.997724533081055, | |
| "kl": 4.360161781311035, | |
| "learning_rate": 9.970330142972401e-05, | |
| "logits/chosen": -59864072.53333333, | |
| "logits/rejected": -39135585.88235294, | |
| "logps/chosen": -592.0302083333333, | |
| "logps/rejected": -396.6316923253676, | |
| "loss": 0.0187, | |
| "rewards/chosen": 6.339927673339844, | |
| "rewards/margins": 13.3125659718233, | |
| "rewards/rejected": -6.972638298483456, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.010232983097304705, | |
| "grad_norm": 9.559985160827637, | |
| "kl": 2.8432774543762207, | |
| "learning_rate": 9.966740896307791e-05, | |
| "logits/chosen": -32762090.0, | |
| "logits/rejected": -64243928.0, | |
| "logps/chosen": -359.212158203125, | |
| "logps/rejected": -476.642822265625, | |
| "loss": 0.0543, | |
| "rewards/chosen": 5.413540363311768, | |
| "rewards/margins": 12.094419002532959, | |
| "rewards/rejected": -6.680878639221191, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.010598446779351301, | |
| "grad_norm": 9.041191101074219, | |
| "kl": 5.258052825927734, | |
| "learning_rate": 9.962947486378326e-05, | |
| "logits/chosen": -63143973.64705882, | |
| "logits/rejected": -46839790.93333333, | |
| "logps/chosen": -453.3519646139706, | |
| "logps/rejected": -312.64703776041665, | |
| "loss": 0.0284, | |
| "rewards/chosen": 6.458764917710248, | |
| "rewards/margins": 14.038505823471967, | |
| "rewards/rejected": -7.579740905761719, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0109639104613979, | |
| "grad_norm": 19.824125289916992, | |
| "kl": 3.294300079345703, | |
| "learning_rate": 9.95895006911623e-05, | |
| "logits/chosen": -46934882.461538464, | |
| "logits/rejected": -52136461.473684214, | |
| "logps/chosen": -373.4299128605769, | |
| "logps/rejected": -472.3502261513158, | |
| "loss": 0.0807, | |
| "rewards/chosen": 5.015818082369291, | |
| "rewards/margins": 12.79021046132694, | |
| "rewards/rejected": -7.774392378957648, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011329374143444496, | |
| "grad_norm": 6.117930889129639, | |
| "kl": 3.7903122901916504, | |
| "learning_rate": 9.954748808839674e-05, | |
| "logits/chosen": -55276661.89473684, | |
| "logits/rejected": -61740268.307692304, | |
| "logps/chosen": -380.1571751644737, | |
| "logps/rejected": -421.41811899038464, | |
| "loss": 0.0218, | |
| "rewards/chosen": 5.947867142526727, | |
| "rewards/margins": 13.426362813725646, | |
| "rewards/rejected": -7.478495671198918, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.011694837825491092, | |
| "grad_norm": 9.04257869720459, | |
| "kl": 2.820640802383423, | |
| "learning_rate": 9.95034387824601e-05, | |
| "logits/chosen": -60048967.11111111, | |
| "logits/rejected": -46811172.571428575, | |
| "logps/chosen": -370.3642849392361, | |
| "logps/rejected": -431.4790736607143, | |
| "loss": 0.03, | |
| "rewards/chosen": 5.1152538723415795, | |
| "rewards/margins": 15.065753270709324, | |
| "rewards/rejected": -9.950499398367745, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.012060301507537688, | |
| "grad_norm": 5.166853427886963, | |
| "kl": 4.204875946044922, | |
| "learning_rate": 9.945735458404681e-05, | |
| "logits/chosen": -44660590.93333333, | |
| "logits/rejected": -65401336.47058824, | |
| "logps/chosen": -398.87623697916666, | |
| "logps/rejected": -530.9944852941177, | |
| "loss": 0.0151, | |
| "rewards/chosen": 6.006900024414063, | |
| "rewards/margins": 17.966142901252297, | |
| "rewards/rejected": -11.959242876838236, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.012425765189584286, | |
| "grad_norm": 23.333698272705078, | |
| "kl": 0.9312124252319336, | |
| "learning_rate": 9.940923738749778e-05, | |
| "logits/chosen": -50497694.11764706, | |
| "logits/rejected": -26872443.733333334, | |
| "logps/chosen": -283.69157858455884, | |
| "logps/rejected": -292.580859375, | |
| "loss": 0.0596, | |
| "rewards/chosen": 4.435907251694623, | |
| "rewards/margins": 10.932175460516238, | |
| "rewards/rejected": -6.496268208821615, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.012791228871630882, | |
| "grad_norm": 5.268362522125244, | |
| "kl": 3.4059906005859375, | |
| "learning_rate": 9.935908917072252e-05, | |
| "logits/chosen": -53245845.333333336, | |
| "logits/rejected": -41666959.058823526, | |
| "logps/chosen": -421.66468098958336, | |
| "logps/rejected": -396.19045840992646, | |
| "loss": 0.0179, | |
| "rewards/chosen": 4.784971110026041, | |
| "rewards/margins": 13.946780694699754, | |
| "rewards/rejected": -9.161809584673714, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.013156692553677478, | |
| "grad_norm": 2.412320613861084, | |
| "kl": 1.53816556930542, | |
| "learning_rate": 9.930691199511775e-05, | |
| "logits/chosen": -41895466.666666664, | |
| "logits/rejected": -59891267.76470588, | |
| "logps/chosen": -382.1077473958333, | |
| "logps/rejected": -449.4469784007353, | |
| "loss": 0.005, | |
| "rewards/chosen": 6.4715830485026045, | |
| "rewards/margins": 19.50740978764553, | |
| "rewards/rejected": -13.035826739142923, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.013522156235724074, | |
| "grad_norm": 6.030672073364258, | |
| "kl": 5.197851181030273, | |
| "learning_rate": 9.925270800548285e-05, | |
| "logits/chosen": -63401881.6, | |
| "logits/rejected": -48393333.333333336, | |
| "logps/chosen": -407.56083984375, | |
| "logps/rejected": -361.7208658854167, | |
| "loss": 0.0263, | |
| "rewards/chosen": 5.010283279418945, | |
| "rewards/margins": 13.853185780843098, | |
| "rewards/rejected": -8.842902501424154, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.013887619917770672, | |
| "grad_norm": 6.735757827758789, | |
| "kl": 2.6782684326171875, | |
| "learning_rate": 9.919647942993148e-05, | |
| "logits/chosen": -55142304.0, | |
| "logits/rejected": -44863476.0, | |
| "logps/chosen": -365.00543212890625, | |
| "logps/rejected": -543.1677856445312, | |
| "loss": 0.0196, | |
| "rewards/chosen": 3.9138436317443848, | |
| "rewards/margins": 18.014302730560303, | |
| "rewards/rejected": -14.100459098815918, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.014253083599817268, | |
| "grad_norm": 5.515551567077637, | |
| "kl": 2.4172496795654297, | |
| "learning_rate": 9.91382285798002e-05, | |
| "logits/chosen": -61824143.058823526, | |
| "logits/rejected": -58962423.46666667, | |
| "logps/chosen": -365.96599264705884, | |
| "logps/rejected": -485.2033203125, | |
| "loss": 0.0134, | |
| "rewards/chosen": 4.332511004279642, | |
| "rewards/margins": 17.40482647465725, | |
| "rewards/rejected": -13.072315470377605, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.014618547281863865, | |
| "grad_norm": 8.376943588256836, | |
| "kl": 0.7859287261962891, | |
| "learning_rate": 9.907795784955327e-05, | |
| "logits/chosen": -50139801.6, | |
| "logits/rejected": -52966418.823529415, | |
| "logps/chosen": -343.3307291666667, | |
| "logps/rejected": -476.7391716452206, | |
| "loss": 0.019, | |
| "rewards/chosen": 6.3787684122721355, | |
| "rewards/margins": 18.696476147221585, | |
| "rewards/rejected": -12.31770773494945, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01498401096391046, | |
| "grad_norm": 4.217868328094482, | |
| "kl": 7.657425880432129, | |
| "learning_rate": 9.901566971668437e-05, | |
| "logits/chosen": -48509728.0, | |
| "logits/rejected": -23746154.666666668, | |
| "logps/chosen": -324.2577880859375, | |
| "logps/rejected": -378.572998046875, | |
| "loss": 0.0192, | |
| "rewards/chosen": 6.645659637451172, | |
| "rewards/margins": 16.82607256571452, | |
| "rewards/rejected": -10.180412928263346, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.015349474645957059, | |
| "grad_norm": 3.5086028575897217, | |
| "kl": 1.2947101593017578, | |
| "learning_rate": 9.895136674161465e-05, | |
| "logits/chosen": -66508174.76923077, | |
| "logits/rejected": -60780672.0, | |
| "logps/chosen": -327.66590294471155, | |
| "logps/rejected": -400.8971011513158, | |
| "loss": 0.008, | |
| "rewards/chosen": 6.866602971003606, | |
| "rewards/margins": 16.84938141981117, | |
| "rewards/rejected": -9.982778448807565, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.015714938328003653, | |
| "grad_norm": 11.383273124694824, | |
| "kl": 4.8486528396606445, | |
| "learning_rate": 9.888505156758759e-05, | |
| "logits/chosen": -63846784.0, | |
| "logits/rejected": -55246641.777777776, | |
| "logps/chosen": -361.37552315848217, | |
| "logps/rejected": -442.8365885416667, | |
| "loss": 0.0595, | |
| "rewards/chosen": 6.539271218436105, | |
| "rewards/margins": 15.308385394868395, | |
| "rewards/rejected": -8.769114176432291, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.016080402010050253, | |
| "grad_norm": 4.625300407409668, | |
| "kl": 5.606626987457275, | |
| "learning_rate": 9.881672692056021e-05, | |
| "logits/chosen": -45201365.333333336, | |
| "logits/rejected": -40087081.14285714, | |
| "logps/chosen": -364.97667100694446, | |
| "logps/rejected": -253.65269252232142, | |
| "loss": 0.0131, | |
| "rewards/chosen": 6.814608679877387, | |
| "rewards/margins": 14.540797339545357, | |
| "rewards/rejected": -7.726188659667969, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01644586569209685, | |
| "grad_norm": 1.9459388256072998, | |
| "kl": 1.063331127166748, | |
| "learning_rate": 9.874639560909117e-05, | |
| "logits/chosen": -49554261.333333336, | |
| "logits/rejected": -39169133.71428572, | |
| "logps/chosen": -323.4977213541667, | |
| "logps/rejected": -365.045654296875, | |
| "loss": 0.0082, | |
| "rewards/chosen": 5.9193136427137585, | |
| "rewards/margins": 15.729785313681951, | |
| "rewards/rejected": -9.810471670968193, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.016811329374143445, | |
| "grad_norm": 10.669745445251465, | |
| "kl": 11.293217658996582, | |
| "learning_rate": 9.867406052422524e-05, | |
| "logits/chosen": -59732997.81818182, | |
| "logits/rejected": -45335836.8, | |
| "logps/chosen": -402.67214133522725, | |
| "logps/rejected": -392.5699951171875, | |
| "loss": 0.0387, | |
| "rewards/chosen": 6.792078885165128, | |
| "rewards/margins": 18.25916186246005, | |
| "rewards/rejected": -11.467082977294922, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01717679305619004, | |
| "grad_norm": 4.895358562469482, | |
| "kl": 5.924516201019287, | |
| "learning_rate": 9.859972463937441e-05, | |
| "logits/chosen": -47930160.0, | |
| "logits/rejected": -44561610.666666664, | |
| "logps/chosen": -343.3493896484375, | |
| "logps/rejected": -452.20849609375, | |
| "loss": 0.0168, | |
| "rewards/chosen": 6.354761123657227, | |
| "rewards/margins": 16.014861424764, | |
| "rewards/rejected": -9.660100301106771, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.017542256738236638, | |
| "grad_norm": 7.401646614074707, | |
| "kl": 3.7730391025543213, | |
| "learning_rate": 9.852339101019574e-05, | |
| "logits/chosen": -30919124.57142857, | |
| "logits/rejected": -39209255.11111111, | |
| "logps/chosen": -269.38614327566967, | |
| "logps/rejected": -556.4567599826389, | |
| "loss": 0.0333, | |
| "rewards/chosen": 4.891801016671317, | |
| "rewards/margins": 20.362538050091455, | |
| "rewards/rejected": -15.47073703342014, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.017907720420283234, | |
| "grad_norm": 23.92951774597168, | |
| "kl": 1.085057258605957, | |
| "learning_rate": 9.844506277446577e-05, | |
| "logits/chosen": -30682542.0, | |
| "logits/rejected": -46926880.0, | |
| "logps/chosen": -253.35186767578125, | |
| "logps/rejected": -473.3765462239583, | |
| "loss": 0.0386, | |
| "rewards/chosen": 6.101473808288574, | |
| "rewards/margins": 18.379438082377114, | |
| "rewards/rejected": -12.277964274088541, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.01827318410232983, | |
| "grad_norm": 8.554891586303711, | |
| "kl": 1.4251766204833984, | |
| "learning_rate": 9.836474315195147e-05, | |
| "logits/chosen": -52996062.11764706, | |
| "logits/rejected": -48182971.733333334, | |
| "logps/chosen": -335.1333582261029, | |
| "logps/rejected": -413.86982421875, | |
| "loss": 0.0302, | |
| "rewards/chosen": 4.55959140553194, | |
| "rewards/margins": 12.97196221445121, | |
| "rewards/rejected": -8.41237080891927, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018638647784376426, | |
| "grad_norm": 4.158783435821533, | |
| "kl": 4.646789073944092, | |
| "learning_rate": 9.828243544427796e-05, | |
| "logits/chosen": -48258592.0, | |
| "logits/rejected": -58305577.14285714, | |
| "logps/chosen": -288.0983072916667, | |
| "logps/rejected": -463.22607421875, | |
| "loss": 0.0122, | |
| "rewards/chosen": 6.595457712809245, | |
| "rewards/margins": 19.841273534865607, | |
| "rewards/rejected": -13.245815822056361, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.019004111466423026, | |
| "grad_norm": 2.682189464569092, | |
| "kl": 2.4769458770751953, | |
| "learning_rate": 9.819814303479267e-05, | |
| "logits/chosen": -69832394.66666667, | |
| "logits/rejected": -49748073.6, | |
| "logps/chosen": -438.50634765625, | |
| "logps/rejected": -484.369775390625, | |
| "loss": 0.0328, | |
| "rewards/chosen": 9.220114390055338, | |
| "rewards/margins": 21.066194407145183, | |
| "rewards/rejected": -11.846080017089843, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.019369575148469622, | |
| "grad_norm": 7.231258392333984, | |
| "kl": 5.255629062652588, | |
| "learning_rate": 9.811186938842645e-05, | |
| "logits/chosen": -71031184.0, | |
| "logits/rejected": -56307680.0, | |
| "logps/chosen": -393.62603759765625, | |
| "logps/rejected": -514.8372802734375, | |
| "loss": 0.0119, | |
| "rewards/chosen": 9.535325050354004, | |
| "rewards/margins": 22.810078620910645, | |
| "rewards/rejected": -13.27475357055664, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.019735038830516218, | |
| "grad_norm": 7.897918224334717, | |
| "kl": 4.446504592895508, | |
| "learning_rate": 9.802361805155097e-05, | |
| "logits/chosen": -51625570.461538464, | |
| "logits/rejected": -45185003.78947368, | |
| "logps/chosen": -379.41346153846155, | |
| "logps/rejected": -640.4477796052631, | |
| "loss": 0.0175, | |
| "rewards/chosen": 5.7126593956580525, | |
| "rewards/margins": 26.850794803758383, | |
| "rewards/rejected": -21.13813540810033, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.020100502512562814, | |
| "grad_norm": 4.542209148406982, | |
| "kl": 2.1538782119750977, | |
| "learning_rate": 9.793339265183303e-05, | |
| "logits/chosen": -40115584.0, | |
| "logits/rejected": -59064515.55555555, | |
| "logps/chosen": -353.450927734375, | |
| "logps/rejected": -619.6857096354166, | |
| "loss": 0.0162, | |
| "rewards/chosen": 5.434125082833426, | |
| "rewards/margins": 19.83607888600183, | |
| "rewards/rejected": -14.401953803168404, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02046596619460941, | |
| "grad_norm": 7.430333614349365, | |
| "kl": 0.5837211608886719, | |
| "learning_rate": 9.784119689808544e-05, | |
| "logits/chosen": -28610328.0, | |
| "logits/rejected": -37285267.2, | |
| "logps/chosen": -281.4744059244792, | |
| "logps/rejected": -427.88486328125, | |
| "loss": 0.0317, | |
| "rewards/chosen": 6.235033671061198, | |
| "rewards/margins": 19.226700846354166, | |
| "rewards/rejected": -12.991667175292969, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.020831429876656007, | |
| "grad_norm": 14.218281745910645, | |
| "kl": 2.6655921936035156, | |
| "learning_rate": 9.774703458011453e-05, | |
| "logits/chosen": -35870300.44444445, | |
| "logits/rejected": -51763912.347826086, | |
| "logps/chosen": -474.1842990451389, | |
| "logps/rejected": -464.2010020380435, | |
| "loss": 0.0335, | |
| "rewards/chosen": 5.657533009847005, | |
| "rewards/margins": 16.456544240315754, | |
| "rewards/rejected": -10.79901123046875, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.021196893558702603, | |
| "grad_norm": 6.1396379470825195, | |
| "kl": 2.680887222290039, | |
| "learning_rate": 9.765090956856436e-05, | |
| "logits/chosen": -39752972.8, | |
| "logits/rejected": -50118452.705882356, | |
| "logps/chosen": -337.64710286458336, | |
| "logps/rejected": -483.1478056066176, | |
| "loss": 0.0194, | |
| "rewards/chosen": 5.75444590250651, | |
| "rewards/margins": 21.916373967189415, | |
| "rewards/rejected": -16.161928064682904, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0215623572407492, | |
| "grad_norm": 11.815670013427734, | |
| "kl": 5.052088737487793, | |
| "learning_rate": 9.755282581475769e-05, | |
| "logits/chosen": -55888608.0, | |
| "logits/rejected": -54900144.0, | |
| "logps/chosen": -439.7640686035156, | |
| "logps/rejected": -312.67437744140625, | |
| "loss": 0.0247, | |
| "rewards/chosen": 8.315971374511719, | |
| "rewards/margins": 16.8193359375, | |
| "rewards/rejected": -8.503364562988281, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0219278209227958, | |
| "grad_norm": 3.399034023284912, | |
| "kl": 0.0, | |
| "learning_rate": 9.745278735053343e-05, | |
| "logits/chosen": -30328192.0, | |
| "logits/rejected": -38406880.0, | |
| "logps/chosen": -300.8030192057292, | |
| "logps/rejected": -372.9355224609375, | |
| "loss": 0.0038, | |
| "rewards/chosen": 6.941567103068034, | |
| "rewards/margins": 20.73451296488444, | |
| "rewards/rejected": -13.792945861816406, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.022293284604842395, | |
| "grad_norm": 14.877612113952637, | |
| "kl": 3.2787771224975586, | |
| "learning_rate": 9.735079828808107e-05, | |
| "logits/chosen": -54578709.333333336, | |
| "logits/rejected": -57578782.11764706, | |
| "logps/chosen": -316.0634765625, | |
| "logps/rejected": -518.0548023897059, | |
| "loss": 0.0239, | |
| "rewards/chosen": 6.856675720214843, | |
| "rewards/margins": 25.521863780302162, | |
| "rewards/rejected": -18.665188060087317, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.02265874828688899, | |
| "grad_norm": 3.7056872844696045, | |
| "kl": 2.769613742828369, | |
| "learning_rate": 9.724686281977146e-05, | |
| "logits/chosen": -54293920.0, | |
| "logits/rejected": -57358412.0, | |
| "logps/chosen": -362.36322021484375, | |
| "logps/rejected": -437.716796875, | |
| "loss": 0.0118, | |
| "rewards/chosen": 7.557523727416992, | |
| "rewards/margins": 22.848549842834473, | |
| "rewards/rejected": -15.29102611541748, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.023024211968935587, | |
| "grad_norm": 7.155191898345947, | |
| "kl": 0.9947853088378906, | |
| "learning_rate": 9.714098521798465e-05, | |
| "logits/chosen": -56705880.615384616, | |
| "logits/rejected": -46115129.2631579, | |
| "logps/chosen": -440.0930363581731, | |
| "logps/rejected": -505.4751233552632, | |
| "loss": 0.0107, | |
| "rewards/chosen": 6.279456505408654, | |
| "rewards/margins": 22.39212579765783, | |
| "rewards/rejected": -16.112669292249176, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.023389675650982183, | |
| "grad_norm": 5.921576976776123, | |
| "kl": 2.5714855194091797, | |
| "learning_rate": 9.703316983493414e-05, | |
| "logits/chosen": -41153717.89473684, | |
| "logits/rejected": -66649604.92307692, | |
| "logps/chosen": -408.80581825657896, | |
| "logps/rejected": -532.0378981370193, | |
| "loss": 0.0181, | |
| "rewards/chosen": 5.973050970780222, | |
| "rewards/margins": 27.444178994367963, | |
| "rewards/rejected": -21.47112802358774, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02375513933302878, | |
| "grad_norm": 6.441226959228516, | |
| "kl": 4.569951057434082, | |
| "learning_rate": 9.692342110248802e-05, | |
| "logits/chosen": -48828369.777777776, | |
| "logits/rejected": -37597776.0, | |
| "logps/chosen": -341.35687934027777, | |
| "logps/rejected": -341.09946986607144, | |
| "loss": 0.0329, | |
| "rewards/chosen": 6.38562986585829, | |
| "rewards/margins": 21.198414333282955, | |
| "rewards/rejected": -14.812784467424665, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.024120603015075376, | |
| "grad_norm": 6.141626834869385, | |
| "kl": 0.8777332305908203, | |
| "learning_rate": 9.681174353198687e-05, | |
| "logits/chosen": -27000544.0, | |
| "logits/rejected": -40192785.06666667, | |
| "logps/chosen": -369.6226447610294, | |
| "logps/rejected": -503.09759114583335, | |
| "loss": 0.0145, | |
| "rewards/chosen": 6.301104826085708, | |
| "rewards/margins": 21.401970508051853, | |
| "rewards/rejected": -15.100865681966146, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.024486066697121972, | |
| "grad_norm": 6.145155906677246, | |
| "kl": 3.8379316329956055, | |
| "learning_rate": 9.669814171405816e-05, | |
| "logits/chosen": -32335815.111111112, | |
| "logits/rejected": -38183881.14285714, | |
| "logps/chosen": -297.35590277777777, | |
| "logps/rejected": -354.58872767857144, | |
| "loss": 0.0263, | |
| "rewards/chosen": 6.240455203586155, | |
| "rewards/margins": 17.790007031153117, | |
| "rewards/rejected": -11.549551827566964, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02485153037916857, | |
| "grad_norm": 5.1387481689453125, | |
| "kl": 7.461835861206055, | |
| "learning_rate": 9.65826203184277e-05, | |
| "logits/chosen": -58576753.777777776, | |
| "logits/rejected": -46430317.71428572, | |
| "logps/chosen": -414.0234375, | |
| "logps/rejected": -470.3946010044643, | |
| "loss": 0.0163, | |
| "rewards/chosen": 8.588349236382378, | |
| "rewards/margins": 23.756410629030256, | |
| "rewards/rejected": -15.16806139264788, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.025216994061215168, | |
| "grad_norm": 4.586066722869873, | |
| "kl": 7.936642646789551, | |
| "learning_rate": 9.64651840937276e-05, | |
| "logits/chosen": -48964741.81818182, | |
| "logits/rejected": -54479142.4, | |
| "logps/chosen": -356.181884765625, | |
| "logps/rejected": -499.202001953125, | |
| "loss": 0.0281, | |
| "rewards/chosen": 6.949990706010298, | |
| "rewards/margins": 22.57191758589311, | |
| "rewards/rejected": -15.621926879882812, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.025582457743261764, | |
| "grad_norm": 5.896798610687256, | |
| "kl": 4.361974716186523, | |
| "learning_rate": 9.63458378673011e-05, | |
| "logits/chosen": -45619443.2, | |
| "logits/rejected": -23227202.666666668, | |
| "logps/chosen": -276.57431640625, | |
| "logps/rejected": -394.6361897786458, | |
| "loss": 0.0337, | |
| "rewards/chosen": 6.1711772918701175, | |
| "rewards/margins": 16.92397346496582, | |
| "rewards/rejected": -10.752796173095703, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02594792142530836, | |
| "grad_norm": 8.044129371643066, | |
| "kl": 4.886088848114014, | |
| "learning_rate": 9.622458654500409e-05, | |
| "logits/chosen": -32569085.53846154, | |
| "logits/rejected": -41087205.05263158, | |
| "logps/chosen": -358.5640399639423, | |
| "logps/rejected": -417.02626439144734, | |
| "loss": 0.0159, | |
| "rewards/chosen": 8.606779245229868, | |
| "rewards/margins": 18.563793630252484, | |
| "rewards/rejected": -9.957014385022616, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.026313385107354956, | |
| "grad_norm": 3.8766796588897705, | |
| "kl": 4.4892730712890625, | |
| "learning_rate": 9.610143511100354e-05, | |
| "logits/chosen": -34946744.0, | |
| "logits/rejected": -40909920.0, | |
| "logps/chosen": -417.0111389160156, | |
| "logps/rejected": -540.6241455078125, | |
| "loss": 0.0048, | |
| "rewards/chosen": 9.499505043029785, | |
| "rewards/margins": 22.59523105621338, | |
| "rewards/rejected": -13.095726013183594, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.026678848789401553, | |
| "grad_norm": 3.4914629459381104, | |
| "kl": 7.008626937866211, | |
| "learning_rate": 9.597638862757255e-05, | |
| "logits/chosen": -37020928.0, | |
| "logits/rejected": -37666613.333333336, | |
| "logps/chosen": -377.010009765625, | |
| "logps/rejected": -358.0433756510417, | |
| "loss": 0.0147, | |
| "rewards/chosen": 7.385871887207031, | |
| "rewards/margins": 15.435877482096354, | |
| "rewards/rejected": -8.050005594889322, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.02704431247144815, | |
| "grad_norm": 21.232009887695312, | |
| "kl": 7.479496955871582, | |
| "learning_rate": 9.584945223488227e-05, | |
| "logits/chosen": -23484949.333333332, | |
| "logits/rejected": -35032530.28571428, | |
| "logps/chosen": -320.8006184895833, | |
| "logps/rejected": -401.1339634486607, | |
| "loss": 0.0546, | |
| "rewards/chosen": 7.029023912217882, | |
| "rewards/margins": 15.557043953547403, | |
| "rewards/rejected": -8.52802004132952, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.027409776153494745, | |
| "grad_norm": 1.4355311393737793, | |
| "kl": 11.160795211791992, | |
| "learning_rate": 9.572063115079063e-05, | |
| "logits/chosen": -26418411.42857143, | |
| "logits/rejected": -44502410.666666664, | |
| "logps/chosen": -326.80458286830356, | |
| "logps/rejected": -410.8125813802083, | |
| "loss": 0.0139, | |
| "rewards/chosen": 9.285047258649554, | |
| "rewards/margins": 18.055134606739834, | |
| "rewards/rejected": -8.770087348090279, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.027775239835541345, | |
| "grad_norm": 6.367202281951904, | |
| "kl": 4.941760063171387, | |
| "learning_rate": 9.558993067062785e-05, | |
| "logits/chosen": -30686507.42857143, | |
| "logits/rejected": -33273493.333333332, | |
| "logps/chosen": -331.98423549107144, | |
| "logps/rejected": -399.31407335069446, | |
| "loss": 0.0184, | |
| "rewards/chosen": 5.848648616245815, | |
| "rewards/margins": 14.488197023906405, | |
| "rewards/rejected": -8.639548407660591, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.02814070351758794, | |
| "grad_norm": 7.1556782722473145, | |
| "kl": 9.753096580505371, | |
| "learning_rate": 9.545735616697875e-05, | |
| "logits/chosen": -26850902.0, | |
| "logits/rejected": -39894508.0, | |
| "logps/chosen": -363.8820495605469, | |
| "logps/rejected": -415.097412109375, | |
| "loss": 0.0148, | |
| "rewards/chosen": 8.675457954406738, | |
| "rewards/margins": 19.9981746673584, | |
| "rewards/rejected": -11.32271671295166, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.028506167199634537, | |
| "grad_norm": 6.342607498168945, | |
| "kl": 9.045159339904785, | |
| "learning_rate": 9.53229130894619e-05, | |
| "logits/chosen": -34480309.333333336, | |
| "logits/rejected": -41044848.0, | |
| "logps/chosen": -389.5137125651042, | |
| "logps/rejected": -607.550146484375, | |
| "loss": 0.0221, | |
| "rewards/chosen": 9.95743497212728, | |
| "rewards/margins": 24.688401158650716, | |
| "rewards/rejected": -14.730966186523437, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.028871630881681133, | |
| "grad_norm": 1.0725568532943726, | |
| "kl": 5.477190971374512, | |
| "learning_rate": 9.518660696450568e-05, | |
| "logits/chosen": -44996597.333333336, | |
| "logits/rejected": -23080464.0, | |
| "logps/chosen": -446.3680013020833, | |
| "logps/rejected": -412.258740234375, | |
| "loss": 0.0077, | |
| "rewards/chosen": 9.444354375203451, | |
| "rewards/margins": 22.551628239949544, | |
| "rewards/rejected": -13.107273864746094, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.02923709456372773, | |
| "grad_norm": 3.533200740814209, | |
| "kl": 4.987822532653809, | |
| "learning_rate": 9.504844339512095e-05, | |
| "logits/chosen": -27770840.0, | |
| "logits/rejected": -28882556.8, | |
| "logps/chosen": -341.8219807942708, | |
| "logps/rejected": -445.261083984375, | |
| "loss": 0.0093, | |
| "rewards/chosen": 6.604569753011067, | |
| "rewards/margins": 17.269882710774738, | |
| "rewards/rejected": -10.665312957763671, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.029602558245774326, | |
| "grad_norm": 1.3078417778015137, | |
| "kl": 6.236767768859863, | |
| "learning_rate": 9.490842806067095e-05, | |
| "logits/chosen": -31380043.29411765, | |
| "logits/rejected": -46493320.53333333, | |
| "logps/chosen": -327.4697840073529, | |
| "logps/rejected": -503.6666666666667, | |
| "loss": 0.0113, | |
| "rewards/chosen": 8.652278226964613, | |
| "rewards/margins": 24.477358589920343, | |
| "rewards/rejected": -15.82508036295573, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.02996802192782092, | |
| "grad_norm": 0.9292804002761841, | |
| "kl": 7.139398574829102, | |
| "learning_rate": 9.476656671663765e-05, | |
| "logits/chosen": -40411835.733333334, | |
| "logits/rejected": -37733078.5882353, | |
| "logps/chosen": -431.92571614583335, | |
| "logps/rejected": -490.7525850183824, | |
| "loss": 0.0129, | |
| "rewards/chosen": 8.565556844075521, | |
| "rewards/margins": 23.004124061734068, | |
| "rewards/rejected": -14.438567217658548, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.030333485609867518, | |
| "grad_norm": 7.188210487365723, | |
| "kl": 7.373342037200928, | |
| "learning_rate": 9.46228651943853e-05, | |
| "logits/chosen": -54043712.0, | |
| "logits/rejected": -37557165.333333336, | |
| "logps/chosen": -322.804541015625, | |
| "logps/rejected": -460.3668212890625, | |
| "loss": 0.0175, | |
| "rewards/chosen": 7.64395751953125, | |
| "rewards/margins": 21.538275146484374, | |
| "rewards/rejected": -13.894317626953125, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.030698949291914118, | |
| "grad_norm": 5.98512601852417, | |
| "kl": 2.127194404602051, | |
| "learning_rate": 9.44773294009206e-05, | |
| "logits/chosen": -39313645.71428572, | |
| "logits/rejected": -43236412.44444445, | |
| "logps/chosen": -376.50913783482144, | |
| "logps/rejected": -564.1276584201389, | |
| "loss": 0.0081, | |
| "rewards/chosen": 8.587449210030693, | |
| "rewards/margins": 26.700571090456037, | |
| "rewards/rejected": -18.113121880425346, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.031064412973960714, | |
| "grad_norm": 8.364197731018066, | |
| "kl": 3.6299943923950195, | |
| "learning_rate": 9.432996531865002e-05, | |
| "logits/chosen": -49738910.11764706, | |
| "logits/rejected": -62371524.266666666, | |
| "logps/chosen": -301.84007352941177, | |
| "logps/rejected": -474.6634765625, | |
| "loss": 0.0109, | |
| "rewards/chosen": 6.6499158073874085, | |
| "rewards/margins": 21.304260493259804, | |
| "rewards/rejected": -14.654344685872395, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.031429876656007306, | |
| "grad_norm": 11.131648063659668, | |
| "kl": 0.9635066986083984, | |
| "learning_rate": 9.418077900513377e-05, | |
| "logits/chosen": -29042400.0, | |
| "logits/rejected": -51443334.4, | |
| "logps/chosen": -283.01108805338544, | |
| "logps/rejected": -413.430224609375, | |
| "loss": 0.0095, | |
| "rewards/chosen": 7.071775436401367, | |
| "rewards/margins": 23.17550926208496, | |
| "rewards/rejected": -16.103733825683594, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.0317953403380539, | |
| "grad_norm": 2.274010419845581, | |
| "kl": 1.5090999603271484, | |
| "learning_rate": 9.40297765928369e-05, | |
| "logits/chosen": -29211932.0, | |
| "logits/rejected": -32424992.0, | |
| "logps/chosen": -267.6764221191406, | |
| "logps/rejected": -351.9060363769531, | |
| "loss": 0.0177, | |
| "rewards/chosen": 7.210115909576416, | |
| "rewards/margins": 23.191922664642334, | |
| "rewards/rejected": -15.981806755065918, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.032160804020100506, | |
| "grad_norm": 3.8534750938415527, | |
| "kl": 1.0469717979431152, | |
| "learning_rate": 9.387696428887716e-05, | |
| "logits/chosen": -40250990.93333333, | |
| "logits/rejected": -26059026.82352941, | |
| "logps/chosen": -348.96025390625, | |
| "logps/rejected": -401.61853745404414, | |
| "loss": 0.0076, | |
| "rewards/chosen": 5.922635396321614, | |
| "rewards/margins": 23.717219782810584, | |
| "rewards/rejected": -17.79458438648897, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0325262677021471, | |
| "grad_norm": 4.639629364013672, | |
| "kl": 0.889378547668457, | |
| "learning_rate": 9.372234837476978e-05, | |
| "logits/chosen": -47629276.0, | |
| "logits/rejected": -46884048.0, | |
| "logps/chosen": -348.05352783203125, | |
| "logps/rejected": -561.5498046875, | |
| "loss": 0.0252, | |
| "rewards/chosen": 5.22454309463501, | |
| "rewards/margins": 23.24092721939087, | |
| "rewards/rejected": -18.01638412475586, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.0328917313841937, | |
| "grad_norm": 3.8778488636016846, | |
| "kl": 4.593163013458252, | |
| "learning_rate": 9.356593520616948e-05, | |
| "logits/chosen": -28529976.470588237, | |
| "logits/rejected": -23993877.333333332, | |
| "logps/chosen": -320.5784696691176, | |
| "logps/rejected": -275.25576171875, | |
| "loss": 0.0122, | |
| "rewards/chosen": 6.165865729836857, | |
| "rewards/margins": 18.66365218817019, | |
| "rewards/rejected": -12.497786458333334, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.033257195066240294, | |
| "grad_norm": 3.869450569152832, | |
| "kl": 4.628847599029541, | |
| "learning_rate": 9.340773121260893e-05, | |
| "logits/chosen": -36773368.47058824, | |
| "logits/rejected": -39695684.266666666, | |
| "logps/chosen": -321.6018497242647, | |
| "logps/rejected": -385.8181966145833, | |
| "loss": 0.013, | |
| "rewards/chosen": 7.261572893928079, | |
| "rewards/margins": 20.196593849331727, | |
| "rewards/rejected": -12.935020955403646, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.03362265874828689, | |
| "grad_norm": 6.570498466491699, | |
| "kl": 7.923100471496582, | |
| "learning_rate": 9.324774289723468e-05, | |
| "logits/chosen": -41189649.777777776, | |
| "logits/rejected": -40854500.571428575, | |
| "logps/chosen": -414.1661783854167, | |
| "logps/rejected": -521.8468889508929, | |
| "loss": 0.0179, | |
| "rewards/chosen": 6.903472052680121, | |
| "rewards/margins": 24.003510853600883, | |
| "rewards/rejected": -17.10003880092076, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.03398812243033349, | |
| "grad_norm": 3.6953377723693848, | |
| "kl": 4.583061218261719, | |
| "learning_rate": 9.308597683653975e-05, | |
| "logits/chosen": -31183828.210526317, | |
| "logits/rejected": -33290806.153846152, | |
| "logps/chosen": -387.0103053042763, | |
| "logps/rejected": -422.2001953125, | |
| "loss": 0.0048, | |
| "rewards/chosen": 8.692630165501646, | |
| "rewards/margins": 24.99088902029431, | |
| "rewards/rejected": -16.298258854792667, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.03435358611238008, | |
| "grad_norm": 4.497894763946533, | |
| "kl": 4.432340621948242, | |
| "learning_rate": 9.292243968009331e-05, | |
| "logits/chosen": -26371536.0, | |
| "logits/rejected": -21584090.666666668, | |
| "logps/chosen": -315.2775634765625, | |
| "logps/rejected": -503.4237874348958, | |
| "loss": 0.0087, | |
| "rewards/chosen": 6.665242767333984, | |
| "rewards/margins": 28.2120974222819, | |
| "rewards/rejected": -21.546854654947918, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03471904979442668, | |
| "grad_norm": 3.493032932281494, | |
| "kl": 3.130887985229492, | |
| "learning_rate": 9.275713815026731e-05, | |
| "logits/chosen": -26396160.0, | |
| "logits/rejected": -43744042.666666664, | |
| "logps/chosen": -410.61624581473217, | |
| "logps/rejected": -389.35004340277777, | |
| "loss": 0.0095, | |
| "rewards/chosen": 8.23011234828404, | |
| "rewards/margins": 21.48780023484003, | |
| "rewards/rejected": -13.25768788655599, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.035084513476473275, | |
| "grad_norm": 4.609635829925537, | |
| "kl": 1.9781148433685303, | |
| "learning_rate": 9.259007904196023e-05, | |
| "logits/chosen": -30621874.0, | |
| "logits/rejected": -24159632.0, | |
| "logps/chosen": -312.6817321777344, | |
| "logps/rejected": -411.6536560058594, | |
| "loss": 0.0057, | |
| "rewards/chosen": 6.837421417236328, | |
| "rewards/margins": 19.352378845214844, | |
| "rewards/rejected": -12.514957427978516, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.03544997715851987, | |
| "grad_norm": 3.911794424057007, | |
| "kl": 1.7758150100708008, | |
| "learning_rate": 9.242126922231763e-05, | |
| "logits/chosen": -25005936.94117647, | |
| "logits/rejected": -26132309.333333332, | |
| "logps/chosen": -304.5228917738971, | |
| "logps/rejected": -515.6973307291667, | |
| "loss": 0.008, | |
| "rewards/chosen": 7.817889942842371, | |
| "rewards/margins": 20.89239352357154, | |
| "rewards/rejected": -13.074503580729166, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.03581544084056647, | |
| "grad_norm": 10.93310260772705, | |
| "kl": 0.0, | |
| "learning_rate": 9.225071563045007e-05, | |
| "logits/chosen": -32446668.0, | |
| "logits/rejected": -31420088.0, | |
| "logps/chosen": -385.95794677734375, | |
| "logps/rejected": -462.2115885416667, | |
| "loss": 0.0179, | |
| "rewards/chosen": 8.103047370910645, | |
| "rewards/margins": 21.233390490214028, | |
| "rewards/rejected": -13.130343119303385, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.036180904522613064, | |
| "grad_norm": 2.214240074157715, | |
| "kl": 1.1216678619384766, | |
| "learning_rate": 9.207842527714767e-05, | |
| "logits/chosen": -42358813.538461536, | |
| "logits/rejected": -37142268.631578945, | |
| "logps/chosen": -405.7210036057692, | |
| "logps/rejected": -493.3158408717105, | |
| "loss": 0.0065, | |
| "rewards/chosen": 8.069786071777344, | |
| "rewards/margins": 22.240832278603, | |
| "rewards/rejected": -14.171046206825658, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.03654636820465966, | |
| "grad_norm": 4.253543376922607, | |
| "kl": 6.843319892883301, | |
| "learning_rate": 9.190440524459203e-05, | |
| "logits/chosen": -26094864.94117647, | |
| "logits/rejected": -32877809.066666666, | |
| "logps/chosen": -304.0940372242647, | |
| "logps/rejected": -444.9977213541667, | |
| "loss": 0.0191, | |
| "rewards/chosen": 8.618506936465993, | |
| "rewards/margins": 20.40099900189568, | |
| "rewards/rejected": -11.782492065429688, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.036911831886706256, | |
| "grad_norm": 4.099592685699463, | |
| "kl": 2.627181053161621, | |
| "learning_rate": 9.172866268606513e-05, | |
| "logits/chosen": -38153634.90909091, | |
| "logits/rejected": -24039457.523809522, | |
| "logps/chosen": -337.33096590909093, | |
| "logps/rejected": -446.8031063988095, | |
| "loss": 0.0187, | |
| "rewards/chosen": 5.4638051119717685, | |
| "rewards/margins": 18.64679432328129, | |
| "rewards/rejected": -13.182989211309524, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.03727729556875285, | |
| "grad_norm": 3.477898597717285, | |
| "kl": 5.307547569274902, | |
| "learning_rate": 9.155120482565521e-05, | |
| "logits/chosen": -37803064.88888889, | |
| "logits/rejected": -30550571.42857143, | |
| "logps/chosen": -354.0104709201389, | |
| "logps/rejected": -488.79739815848217, | |
| "loss": 0.0087, | |
| "rewards/chosen": 8.774648878309462, | |
| "rewards/margins": 22.556626940530442, | |
| "rewards/rejected": -13.781978062220983, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.03764275925079945, | |
| "grad_norm": 8.65983772277832, | |
| "kl": 8.958831787109375, | |
| "learning_rate": 9.137203895795983e-05, | |
| "logits/chosen": -35876150.85714286, | |
| "logits/rejected": -30704293.818181816, | |
| "logps/chosen": -336.96210007440476, | |
| "logps/rejected": -496.71315696022725, | |
| "loss": 0.0223, | |
| "rewards/chosen": 7.153175717308407, | |
| "rewards/margins": 19.01286976471608, | |
| "rewards/rejected": -11.85969404740767, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.03800822293284605, | |
| "grad_norm": 11.845693588256836, | |
| "kl": 2.2604990005493164, | |
| "learning_rate": 9.119117244778607e-05, | |
| "logits/chosen": -33321863.384615384, | |
| "logits/rejected": -40389894.7368421, | |
| "logps/chosen": -338.03354116586536, | |
| "logps/rejected": -462.0747327302632, | |
| "loss": 0.0115, | |
| "rewards/chosen": 9.559725247896635, | |
| "rewards/margins": 23.082558821087424, | |
| "rewards/rejected": -13.52283357319079, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.03837368661489265, | |
| "grad_norm": 3.929539680480957, | |
| "kl": 9.226816177368164, | |
| "learning_rate": 9.10086127298478e-05, | |
| "logits/chosen": -28004862.11764706, | |
| "logits/rejected": -34591616.0, | |
| "logps/chosen": -368.80325137867646, | |
| "logps/rejected": -428.3048828125, | |
| "loss": 0.0157, | |
| "rewards/chosen": 8.625091552734375, | |
| "rewards/margins": 22.166370646158853, | |
| "rewards/rejected": -13.54127909342448, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.038739150296939244, | |
| "grad_norm": 7.086399555206299, | |
| "kl": 5.677105903625488, | |
| "learning_rate": 9.082436730845993e-05, | |
| "logits/chosen": -37228549.333333336, | |
| "logits/rejected": -31119241.6, | |
| "logps/chosen": -417.1453857421875, | |
| "logps/rejected": -465.59287109375, | |
| "loss": 0.0289, | |
| "rewards/chosen": 6.946853001912435, | |
| "rewards/margins": 20.81446622212728, | |
| "rewards/rejected": -13.867613220214844, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03910461397898584, | |
| "grad_norm": 2.494783878326416, | |
| "kl": 12.536182403564453, | |
| "learning_rate": 9.063844375723014e-05, | |
| "logits/chosen": -29469715.555555556, | |
| "logits/rejected": -35742962.28571428, | |
| "logps/chosen": -398.61485460069446, | |
| "logps/rejected": -526.6864885602679, | |
| "loss": 0.0136, | |
| "rewards/chosen": 10.75872802734375, | |
| "rewards/margins": 25.600702558244976, | |
| "rewards/rejected": -14.841974530901227, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.039470077661032436, | |
| "grad_norm": 2.447875499725342, | |
| "kl": 1.5802021026611328, | |
| "learning_rate": 9.045084971874738e-05, | |
| "logits/chosen": -40079336.72727273, | |
| "logits/rejected": -35073746.28571428, | |
| "logps/chosen": -425.91996626420456, | |
| "logps/rejected": -501.5933314732143, | |
| "loss": 0.0051, | |
| "rewards/chosen": 11.33784970370206, | |
| "rewards/margins": 27.476164087072597, | |
| "rewards/rejected": -16.138314383370535, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03983554134307903, | |
| "grad_norm": 4.472402095794678, | |
| "kl": 7.777850151062012, | |
| "learning_rate": 9.02615929042678e-05, | |
| "logits/chosen": -39484612.92307692, | |
| "logits/rejected": -46556890.94736842, | |
| "logps/chosen": -351.8323317307692, | |
| "logps/rejected": -418.0727025082237, | |
| "loss": 0.0158, | |
| "rewards/chosen": 9.19132056603065, | |
| "rewards/margins": 22.550823258002275, | |
| "rewards/rejected": -13.359502691971628, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.04020100502512563, | |
| "grad_norm": 4.362159729003906, | |
| "kl": 9.24830436706543, | |
| "learning_rate": 9.007068109339784e-05, | |
| "logits/chosen": -34914912.0, | |
| "logits/rejected": -53206777.6, | |
| "logps/chosen": -442.7127574573864, | |
| "logps/rejected": -573.83603515625, | |
| "loss": 0.018, | |
| "rewards/chosen": 9.137822931463068, | |
| "rewards/margins": 21.902453058416192, | |
| "rewards/rejected": -12.764630126953126, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.040566468707172225, | |
| "grad_norm": 2.3844363689422607, | |
| "kl": 3.848033905029297, | |
| "learning_rate": 8.987812213377424e-05, | |
| "logits/chosen": -41293036.307692304, | |
| "logits/rejected": -32670403.36842105, | |
| "logps/chosen": -384.64734825721155, | |
| "logps/rejected": -461.97085731907896, | |
| "loss": 0.0095, | |
| "rewards/chosen": 8.858161926269531, | |
| "rewards/margins": 23.253070630525286, | |
| "rewards/rejected": -14.394908704255757, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.04093193238921882, | |
| "grad_norm": 4.107156753540039, | |
| "kl": 4.59416389465332, | |
| "learning_rate": 8.968392394074164e-05, | |
| "logits/chosen": -24259356.23529412, | |
| "logits/rejected": -40533777.06666667, | |
| "logps/chosen": -305.0940946691176, | |
| "logps/rejected": -534.0719401041666, | |
| "loss": 0.0231, | |
| "rewards/chosen": 6.991543938131893, | |
| "rewards/margins": 18.158666333965225, | |
| "rewards/rejected": -11.167122395833333, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.04129739607126542, | |
| "grad_norm": 3.683842658996582, | |
| "kl": 6.614081382751465, | |
| "learning_rate": 8.948809449702711e-05, | |
| "logits/chosen": -31701672.0, | |
| "logits/rejected": -47852736.0, | |
| "logps/chosen": -347.9965515136719, | |
| "logps/rejected": -484.1363830566406, | |
| "loss": 0.0063, | |
| "rewards/chosen": 9.465049743652344, | |
| "rewards/margins": 24.32558536529541, | |
| "rewards/rejected": -14.860535621643066, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.04166285975331201, | |
| "grad_norm": 4.799642086029053, | |
| "kl": 4.523487567901611, | |
| "learning_rate": 8.929064185241213e-05, | |
| "logits/chosen": -37363976.53333333, | |
| "logits/rejected": -27903503.05882353, | |
| "logps/chosen": -223.542919921875, | |
| "logps/rejected": -515.1665900735294, | |
| "loss": 0.0242, | |
| "rewards/chosen": 6.066276041666667, | |
| "rewards/margins": 18.198430618585327, | |
| "rewards/rejected": -12.132154576918659, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04202832343535861, | |
| "grad_norm": 1.5732982158660889, | |
| "kl": 5.359951972961426, | |
| "learning_rate": 8.90915741234015e-05, | |
| "logits/chosen": -37210144.0, | |
| "logits/rejected": -49808716.0, | |
| "logps/chosen": -354.8924560546875, | |
| "logps/rejected": -565.97607421875, | |
| "loss": 0.0098, | |
| "rewards/chosen": 9.922457695007324, | |
| "rewards/margins": 25.488935470581055, | |
| "rewards/rejected": -15.56647777557373, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.042393787117405206, | |
| "grad_norm": 2.5431289672851562, | |
| "kl": 8.323074340820312, | |
| "learning_rate": 8.889089949288986e-05, | |
| "logits/chosen": -32031748.0, | |
| "logits/rejected": -37969088.0, | |
| "logps/chosen": -331.563232421875, | |
| "logps/rejected": -426.9198913574219, | |
| "loss": 0.0026, | |
| "rewards/chosen": 9.050741195678711, | |
| "rewards/margins": 20.511554718017578, | |
| "rewards/rejected": -11.460813522338867, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0427592507994518, | |
| "grad_norm": 2.4684252738952637, | |
| "kl": 6.228133678436279, | |
| "learning_rate": 8.868862620982534e-05, | |
| "logits/chosen": -26141841.777777776, | |
| "logits/rejected": -40421668.571428575, | |
| "logps/chosen": -383.61431206597223, | |
| "logps/rejected": -528.8011997767857, | |
| "loss": 0.0142, | |
| "rewards/chosen": 7.288478427463108, | |
| "rewards/margins": 20.678058200412327, | |
| "rewards/rejected": -13.389579772949219, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.0431247144814984, | |
| "grad_norm": 3.3828604221343994, | |
| "kl": 9.635725021362305, | |
| "learning_rate": 8.848476258887031e-05, | |
| "logits/chosen": -34122983.61904762, | |
| "logits/rejected": -34187534.54545455, | |
| "logps/chosen": -324.4385695684524, | |
| "logps/rejected": -425.9974254261364, | |
| "loss": 0.0221, | |
| "rewards/chosen": 7.775054205031622, | |
| "rewards/margins": 22.53334250181784, | |
| "rewards/rejected": -14.75828829678622, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.043490178163544994, | |
| "grad_norm": 18.695188522338867, | |
| "kl": 3.486945629119873, | |
| "learning_rate": 8.827931701005974e-05, | |
| "logits/chosen": -32471527.384615384, | |
| "logits/rejected": -31171927.57894737, | |
| "logps/chosen": -384.1367938701923, | |
| "logps/rejected": -510.8258634868421, | |
| "loss": 0.0168, | |
| "rewards/chosen": 9.219330420860878, | |
| "rewards/margins": 21.40822137994805, | |
| "rewards/rejected": -12.188890959087171, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.0438556418455916, | |
| "grad_norm": 1.4896206855773926, | |
| "kl": 7.4243879318237305, | |
| "learning_rate": 8.807229791845673e-05, | |
| "logits/chosen": -22926546.82352941, | |
| "logits/rejected": -39986184.53333333, | |
| "logps/chosen": -313.80905330882354, | |
| "logps/rejected": -493.6060546875, | |
| "loss": 0.0138, | |
| "rewards/chosen": 8.630747178021599, | |
| "rewards/margins": 22.445148423138786, | |
| "rewards/rejected": -13.814401245117187, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.044221105527638194, | |
| "grad_norm": 15.042485237121582, | |
| "kl": 7.030808925628662, | |
| "learning_rate": 8.786371382380528e-05, | |
| "logits/chosen": -22164546.90909091, | |
| "logits/rejected": -33053568.0, | |
| "logps/chosen": -372.3087269176136, | |
| "logps/rejected": -480.8042689732143, | |
| "loss": 0.0353, | |
| "rewards/chosen": 8.92121748490767, | |
| "rewards/margins": 20.267590543408414, | |
| "rewards/rejected": -11.346373058500744, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.04458656920968479, | |
| "grad_norm": 4.385624885559082, | |
| "kl": 13.904373168945312, | |
| "learning_rate": 8.765357330018056e-05, | |
| "logits/chosen": -13394754.0, | |
| "logits/rejected": -32771898.0, | |
| "logps/chosen": -385.470947265625, | |
| "logps/rejected": -507.3829345703125, | |
| "loss": 0.0187, | |
| "rewards/chosen": 8.200074195861816, | |
| "rewards/margins": 22.01417636871338, | |
| "rewards/rejected": -13.814102172851562, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.044952032891731386, | |
| "grad_norm": 2.888279914855957, | |
| "kl": 3.44219970703125, | |
| "learning_rate": 8.744188498563641e-05, | |
| "logits/chosen": -29559440.94117647, | |
| "logits/rejected": -32810423.466666665, | |
| "logps/chosen": -322.3883846507353, | |
| "logps/rejected": -476.08134765625, | |
| "loss": 0.0096, | |
| "rewards/chosen": 8.27079413918888, | |
| "rewards/margins": 21.863239004097736, | |
| "rewards/rejected": -13.592444864908854, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.04531749657377798, | |
| "grad_norm": 7.156754016876221, | |
| "kl": 11.657984733581543, | |
| "learning_rate": 8.722865758185035e-05, | |
| "logits/chosen": -29726720.0, | |
| "logits/rejected": -26009784.470588237, | |
| "logps/chosen": -405.5867513020833, | |
| "logps/rejected": -462.2352079503676, | |
| "loss": 0.0228, | |
| "rewards/chosen": 9.056148274739583, | |
| "rewards/margins": 19.708162614411, | |
| "rewards/rejected": -10.652014339671416, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04568296025582458, | |
| "grad_norm": 1.9241881370544434, | |
| "kl": 10.992962837219238, | |
| "learning_rate": 8.701389985376578e-05, | |
| "logits/chosen": -33494584.888888888, | |
| "logits/rejected": -28275019.42857143, | |
| "logps/chosen": -390.51502821180554, | |
| "logps/rejected": -432.02085658482144, | |
| "loss": 0.0201, | |
| "rewards/chosen": 9.984032524956596, | |
| "rewards/margins": 25.02918219187903, | |
| "rewards/rejected": -15.045149666922432, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.046048423937871175, | |
| "grad_norm": 4.758550643920898, | |
| "kl": 9.395109176635742, | |
| "learning_rate": 8.679762062923175e-05, | |
| "logits/chosen": -11328824.888888888, | |
| "logits/rejected": -49017302.85714286, | |
| "logps/chosen": -386.6030544704861, | |
| "logps/rejected": -573.0137765066964, | |
| "loss": 0.0175, | |
| "rewards/chosen": 8.314485337999132, | |
| "rewards/margins": 20.924610198490203, | |
| "rewards/rejected": -12.610124860491071, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04641388761991777, | |
| "grad_norm": 3.189591884613037, | |
| "kl": 5.045682907104492, | |
| "learning_rate": 8.657982879864007e-05, | |
| "logits/chosen": -52660928.0, | |
| "logits/rejected": -22908872.0, | |
| "logps/chosen": -363.4823404947917, | |
| "logps/rejected": -432.112939453125, | |
| "loss": 0.0127, | |
| "rewards/chosen": 8.550245920817057, | |
| "rewards/margins": 21.691986338297525, | |
| "rewards/rejected": -13.141740417480468, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.04677935130196437, | |
| "grad_norm": 7.768436908721924, | |
| "kl": 4.893695831298828, | |
| "learning_rate": 8.636053331455987e-05, | |
| "logits/chosen": -19024720.0, | |
| "logits/rejected": -11870157.333333334, | |
| "logps/chosen": -340.36279296875, | |
| "logps/rejected": -477.72802734375, | |
| "loss": 0.0169, | |
| "rewards/chosen": 6.748569488525391, | |
| "rewards/margins": 21.24854405721029, | |
| "rewards/rejected": -14.499974568684896, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.04714481498401096, | |
| "grad_norm": 6.462328910827637, | |
| "kl": 7.855134010314941, | |
| "learning_rate": 8.613974319136958e-05, | |
| "logits/chosen": -32957366.85714286, | |
| "logits/rejected": -34900357.81818182, | |
| "logps/chosen": -345.69256882440476, | |
| "logps/rejected": -376.31613991477275, | |
| "loss": 0.0174, | |
| "rewards/chosen": 7.068138485863095, | |
| "rewards/margins": 19.82476773612943, | |
| "rewards/rejected": -12.756629250266336, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.04751027866605756, | |
| "grad_norm": 6.749807834625244, | |
| "kl": 6.279943466186523, | |
| "learning_rate": 8.591746750488639e-05, | |
| "logits/chosen": -31112480.0, | |
| "logits/rejected": -37449440.0, | |
| "logps/chosen": -328.711083984375, | |
| "logps/rejected": -521.7969563802084, | |
| "loss": 0.0145, | |
| "rewards/chosen": 8.407637786865234, | |
| "rewards/margins": 21.947111002604167, | |
| "rewards/rejected": -13.539473215738932, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.047875742348104156, | |
| "grad_norm": 10.204842567443848, | |
| "kl": 1.7673616409301758, | |
| "learning_rate": 8.569371539199316e-05, | |
| "logits/chosen": -37636050.666666664, | |
| "logits/rejected": -25786320.0, | |
| "logps/chosen": -376.6614990234375, | |
| "logps/rejected": -473.077490234375, | |
| "loss": 0.0098, | |
| "rewards/chosen": 9.883068084716797, | |
| "rewards/margins": 22.04599838256836, | |
| "rewards/rejected": -12.162930297851563, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.04824120603015075, | |
| "grad_norm": 1.5196152925491333, | |
| "kl": 6.041738986968994, | |
| "learning_rate": 8.54684960502629e-05, | |
| "logits/chosen": -34432869.64705882, | |
| "logits/rejected": -33060744.533333335, | |
| "logps/chosen": -297.23609834558823, | |
| "logps/rejected": -432.93837890625, | |
| "loss": 0.0086, | |
| "rewards/chosen": 8.036726110121784, | |
| "rewards/margins": 20.355531041762408, | |
| "rewards/rejected": -12.318804931640624, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.04860666971219735, | |
| "grad_norm": 4.4139933586120605, | |
| "kl": 5.9731903076171875, | |
| "learning_rate": 8.524181873758059e-05, | |
| "logits/chosen": -29148693.333333332, | |
| "logits/rejected": -34617346.28571428, | |
| "logps/chosen": -313.3556315104167, | |
| "logps/rejected": -440.9218052455357, | |
| "loss": 0.0093, | |
| "rewards/chosen": 8.365788777669271, | |
| "rewards/margins": 22.307918730236235, | |
| "rewards/rejected": -13.942129952566964, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.048972133394243944, | |
| "grad_norm": 4.669734954833984, | |
| "kl": 4.484433174133301, | |
| "learning_rate": 8.501369277176276e-05, | |
| "logits/chosen": -37621051.07692308, | |
| "logits/rejected": -51850731.78947368, | |
| "logps/chosen": -378.05262169471155, | |
| "logps/rejected": -357.8472964638158, | |
| "loss": 0.0139, | |
| "rewards/chosen": 8.4474851168119, | |
| "rewards/margins": 22.600776517922096, | |
| "rewards/rejected": -14.153291401110197, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04933759707629054, | |
| "grad_norm": 3.8269076347351074, | |
| "kl": 3.0405588150024414, | |
| "learning_rate": 8.478412753017433e-05, | |
| "logits/chosen": -50635735.27272727, | |
| "logits/rejected": -40710640.76190476, | |
| "logps/chosen": -336.33686967329544, | |
| "logps/rejected": -490.99595424107144, | |
| "loss": 0.0131, | |
| "rewards/chosen": 7.602625760165128, | |
| "rewards/margins": 25.30353460270605, | |
| "rewards/rejected": -17.700908842540922, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04970306075833714, | |
| "grad_norm": 4.64284086227417, | |
| "kl": 4.288601875305176, | |
| "learning_rate": 8.455313244934324e-05, | |
| "logits/chosen": -37124844.307692304, | |
| "logits/rejected": -31651226.94736842, | |
| "logps/chosen": -368.30724158653845, | |
| "logps/rejected": -519.6214021381579, | |
| "loss": 0.0083, | |
| "rewards/chosen": 8.17648432804988, | |
| "rewards/margins": 26.35454797165596, | |
| "rewards/rejected": -18.178063643606084, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.05006852444038374, | |
| "grad_norm": 9.166915893554688, | |
| "kl": 3.1957833766937256, | |
| "learning_rate": 8.432071702457252e-05, | |
| "logits/chosen": -19971820.0, | |
| "logits/rejected": -39954416.0, | |
| "logps/chosen": -392.2398986816406, | |
| "logps/rejected": -356.62542724609375, | |
| "loss": 0.0161, | |
| "rewards/chosen": 8.67671012878418, | |
| "rewards/margins": 20.90134620666504, | |
| "rewards/rejected": -12.22463607788086, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.050433988122430336, | |
| "grad_norm": 5.984962463378906, | |
| "kl": 0.09490013122558594, | |
| "learning_rate": 8.408689080954998e-05, | |
| "logits/chosen": -56916085.333333336, | |
| "logits/rejected": -34009542.4, | |
| "logps/chosen": -371.3211669921875, | |
| "logps/rejected": -439.77919921875, | |
| "loss": 0.0094, | |
| "rewards/chosen": 5.444234212239583, | |
| "rewards/margins": 20.338495381673177, | |
| "rewards/rejected": -14.894261169433594, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.05079945180447693, | |
| "grad_norm": 0.4479733109474182, | |
| "kl": 0.0, | |
| "learning_rate": 8.385166341595548e-05, | |
| "logits/chosen": -23358480.0, | |
| "logits/rejected": -36381641.14285714, | |
| "logps/chosen": -302.4280894886364, | |
| "logps/rejected": -490.02715773809524, | |
| "loss": 0.0008, | |
| "rewards/chosen": 8.983969254927201, | |
| "rewards/margins": 25.035687830541043, | |
| "rewards/rejected": -16.05171857561384, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.05116491548652353, | |
| "grad_norm": 6.6932549476623535, | |
| "kl": 5.087161540985107, | |
| "learning_rate": 8.361504451306585e-05, | |
| "logits/chosen": -19172888.0, | |
| "logits/rejected": -29464570.666666668, | |
| "logps/chosen": -281.700341796875, | |
| "logps/rejected": -372.93115234375, | |
| "loss": 0.0127, | |
| "rewards/chosen": 7.251024627685547, | |
| "rewards/margins": 19.892823282877604, | |
| "rewards/rejected": -12.641798655192057, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.051530379168570124, | |
| "grad_norm": 5.902066707611084, | |
| "kl": 6.660229682922363, | |
| "learning_rate": 8.33770438273574e-05, | |
| "logits/chosen": -39327835.428571425, | |
| "logits/rejected": -27682455.272727273, | |
| "logps/chosen": -321.85907273065476, | |
| "logps/rejected": -416.8302556818182, | |
| "loss": 0.0169, | |
| "rewards/chosen": 7.136968703497024, | |
| "rewards/margins": 21.812885895435944, | |
| "rewards/rejected": -14.67591719193892, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.05189584285061672, | |
| "grad_norm": 2.5956599712371826, | |
| "kl": 3.9250850677490234, | |
| "learning_rate": 8.313767114210615e-05, | |
| "logits/chosen": -40608861.538461536, | |
| "logits/rejected": -36192471.578947365, | |
| "logps/chosen": -405.92709585336536, | |
| "logps/rejected": -483.7523643092105, | |
| "loss": 0.0045, | |
| "rewards/chosen": 8.45544198843149, | |
| "rewards/margins": 26.898499400026886, | |
| "rewards/rejected": -18.443057411595394, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.05226130653266332, | |
| "grad_norm": 1.836310863494873, | |
| "kl": 2.898988723754883, | |
| "learning_rate": 8.289693629698564e-05, | |
| "logits/chosen": -33169533.333333332, | |
| "logits/rejected": -47413168.0, | |
| "logps/chosen": -370.8831380208333, | |
| "logps/rejected": -609.94736328125, | |
| "loss": 0.0018, | |
| "rewards/chosen": 10.799910227457682, | |
| "rewards/margins": 28.98655573527018, | |
| "rewards/rejected": -18.1866455078125, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.05262677021470991, | |
| "grad_norm": 5.187747955322266, | |
| "kl": 8.429890632629395, | |
| "learning_rate": 8.265484918766243e-05, | |
| "logits/chosen": -20554602.0, | |
| "logits/rejected": -31753056.0, | |
| "logps/chosen": -316.80816650390625, | |
| "logps/rejected": -437.7784729003906, | |
| "loss": 0.0163, | |
| "rewards/chosen": 9.590449333190918, | |
| "rewards/margins": 23.719423294067383, | |
| "rewards/rejected": -14.128973960876465, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05299223389675651, | |
| "grad_norm": 5.367190837860107, | |
| "kl": 11.206666946411133, | |
| "learning_rate": 8.241141976538943e-05, | |
| "logits/chosen": -40488704.0, | |
| "logits/rejected": -37375149.333333336, | |
| "logps/chosen": -336.3726806640625, | |
| "logps/rejected": -526.9472249348959, | |
| "loss": 0.0318, | |
| "rewards/chosen": 8.726722717285156, | |
| "rewards/margins": 23.846473693847656, | |
| "rewards/rejected": -15.1197509765625, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.053357697578803105, | |
| "grad_norm": 6.1537933349609375, | |
| "kl": 7.488029956817627, | |
| "learning_rate": 8.216665803659671e-05, | |
| "logits/chosen": -17490802.285714287, | |
| "logits/rejected": -24749454.545454547, | |
| "logps/chosen": -302.71609933035717, | |
| "logps/rejected": -321.91195401278407, | |
| "loss": 0.0174, | |
| "rewards/chosen": 7.857398623511905, | |
| "rewards/margins": 18.530006045386905, | |
| "rewards/rejected": -10.672607421875, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.0537231612608497, | |
| "grad_norm": 23.973215103149414, | |
| "kl": 8.004353523254395, | |
| "learning_rate": 8.192057406248028e-05, | |
| "logits/chosen": -11659069.714285715, | |
| "logits/rejected": -31665980.444444444, | |
| "logps/chosen": -329.85323660714283, | |
| "logps/rejected": -498.25352647569446, | |
| "loss": 0.0289, | |
| "rewards/chosen": 9.294958932059151, | |
| "rewards/margins": 24.97127423967634, | |
| "rewards/rejected": -15.676315307617188, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.0540886249428963, | |
| "grad_norm": 3.193537712097168, | |
| "kl": 0.8649396896362305, | |
| "learning_rate": 8.167317795858851e-05, | |
| "logits/chosen": -14098165.714285715, | |
| "logits/rejected": -19797928.888888888, | |
| "logps/chosen": -238.59490094866072, | |
| "logps/rejected": -433.498779296875, | |
| "loss": 0.0145, | |
| "rewards/chosen": 5.340590340750558, | |
| "rewards/margins": 18.65028780982608, | |
| "rewards/rejected": -13.309697469075521, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.054454088624942894, | |
| "grad_norm": 4.464662551879883, | |
| "kl": 8.388834953308105, | |
| "learning_rate": 8.142447989440618e-05, | |
| "logits/chosen": -18256884.0, | |
| "logits/rejected": -26674926.0, | |
| "logps/chosen": -288.3346862792969, | |
| "logps/rejected": -408.85137939453125, | |
| "loss": 0.018, | |
| "rewards/chosen": 8.224873542785645, | |
| "rewards/margins": 17.814136505126953, | |
| "rewards/rejected": -9.589262962341309, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.05481955230698949, | |
| "grad_norm": 15.688850402832031, | |
| "kl": 7.512658596038818, | |
| "learning_rate": 8.117449009293668e-05, | |
| "logits/chosen": -24875326.11764706, | |
| "logits/rejected": -23949489.066666666, | |
| "logps/chosen": -285.2714269301471, | |
| "logps/rejected": -423.3251953125, | |
| "loss": 0.0363, | |
| "rewards/chosen": 8.441160314223346, | |
| "rewards/margins": 17.377709183038448, | |
| "rewards/rejected": -8.936548868815104, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.055185015989036086, | |
| "grad_norm": 18.866296768188477, | |
| "kl": 6.525827407836914, | |
| "learning_rate": 8.092321883028158e-05, | |
| "logits/chosen": -22583564.0, | |
| "logits/rejected": -32450880.0, | |
| "logps/chosen": -287.40631103515625, | |
| "logps/rejected": -471.9981384277344, | |
| "loss": 0.0304, | |
| "rewards/chosen": 8.566713333129883, | |
| "rewards/margins": 19.275400161743164, | |
| "rewards/rejected": -10.708686828613281, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.05555047967108269, | |
| "grad_norm": 3.900456428527832, | |
| "kl": 11.181682586669922, | |
| "learning_rate": 8.067067643521834e-05, | |
| "logits/chosen": -15145715.555555556, | |
| "logits/rejected": -12945529.142857144, | |
| "logps/chosen": -271.58203125, | |
| "logps/rejected": -495.61366489955356, | |
| "loss": 0.0086, | |
| "rewards/chosen": 9.396018134223091, | |
| "rewards/margins": 21.07727510966952, | |
| "rewards/rejected": -11.681256975446429, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.055915943353129285, | |
| "grad_norm": 1.2899030447006226, | |
| "kl": 6.685637950897217, | |
| "learning_rate": 8.041687328877567e-05, | |
| "logits/chosen": -11986612.266666668, | |
| "logits/rejected": -19600841.411764707, | |
| "logps/chosen": -345.98743489583336, | |
| "logps/rejected": -457.33820657169116, | |
| "loss": 0.0076, | |
| "rewards/chosen": 10.541527303059896, | |
| "rewards/margins": 22.469056133195465, | |
| "rewards/rejected": -11.92752883013557, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.05628140703517588, | |
| "grad_norm": 3.640664577484131, | |
| "kl": 6.153097152709961, | |
| "learning_rate": 8.016181982380682e-05, | |
| "logits/chosen": -27638136.0, | |
| "logits/rejected": -14189388.8, | |
| "logps/chosen": -393.5046793619792, | |
| "logps/rejected": -428.01044921875, | |
| "loss": 0.0074, | |
| "rewards/chosen": 9.385032018025717, | |
| "rewards/margins": 21.475312169392904, | |
| "rewards/rejected": -12.090280151367187, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.05664687071722248, | |
| "grad_norm": 2.055399179458618, | |
| "kl": 7.720605850219727, | |
| "learning_rate": 7.990552652456081e-05, | |
| "logits/chosen": -20083619.555555556, | |
| "logits/rejected": -25948745.14285714, | |
| "logps/chosen": -345.41004774305554, | |
| "logps/rejected": -467.7896205357143, | |
| "loss": 0.0151, | |
| "rewards/chosen": 9.123803880479601, | |
| "rewards/margins": 20.44527822827536, | |
| "rewards/rejected": -11.321474347795759, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.057012334399269074, | |
| "grad_norm": 2.521709442138672, | |
| "kl": 4.136674880981445, | |
| "learning_rate": 7.964800392625129e-05, | |
| "logits/chosen": -24411598.769230768, | |
| "logits/rejected": -23131924.210526317, | |
| "logps/chosen": -372.10745943509613, | |
| "logps/rejected": -459.2008634868421, | |
| "loss": 0.0066, | |
| "rewards/chosen": 8.230770404522236, | |
| "rewards/margins": 21.248242521092962, | |
| "rewards/rejected": -13.017472116570724, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.05737779808131567, | |
| "grad_norm": 2.4811270236968994, | |
| "kl": 4.052708625793457, | |
| "learning_rate": 7.938926261462366e-05, | |
| "logits/chosen": -26443162.181818184, | |
| "logits/rejected": -26732790.85714286, | |
| "logps/chosen": -279.29030539772725, | |
| "logps/rejected": -473.4995349702381, | |
| "loss": 0.0031, | |
| "rewards/chosen": 8.906252774325283, | |
| "rewards/margins": 22.31977467722707, | |
| "rewards/rejected": -13.413521902901786, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.057743261763362266, | |
| "grad_norm": 23.89385986328125, | |
| "kl": 7.045736312866211, | |
| "learning_rate": 7.91293132255198e-05, | |
| "logits/chosen": -28469895.529411763, | |
| "logits/rejected": -35620768.0, | |
| "logps/chosen": -331.1364315257353, | |
| "logps/rejected": -513.88671875, | |
| "loss": 0.0232, | |
| "rewards/chosen": 10.059974221622243, | |
| "rewards/margins": 26.979096536075367, | |
| "rewards/rejected": -16.919122314453126, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05810872544540886, | |
| "grad_norm": 0.40293624997138977, | |
| "kl": 7.891809463500977, | |
| "learning_rate": 7.886816644444098e-05, | |
| "logits/chosen": -20793583.157894738, | |
| "logits/rejected": -41698372.92307692, | |
| "logps/chosen": -325.18911903782896, | |
| "logps/rejected": -543.1114783653846, | |
| "loss": 0.0005, | |
| "rewards/chosen": 11.437842118112664, | |
| "rewards/margins": 30.14801902616555, | |
| "rewards/rejected": -18.710176908052883, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.05847418912745546, | |
| "grad_norm": 1.8891913890838623, | |
| "kl": 2.793865203857422, | |
| "learning_rate": 7.860583300610849e-05, | |
| "logits/chosen": -29260708.923076924, | |
| "logits/rejected": -18127292.63157895, | |
| "logps/chosen": -321.8430363581731, | |
| "logps/rejected": -431.8196957236842, | |
| "loss": 0.0023, | |
| "rewards/chosen": 8.246222275954027, | |
| "rewards/margins": 22.301471941866858, | |
| "rewards/rejected": -14.055249665912829, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.058839652809502055, | |
| "grad_norm": 1.9422544240951538, | |
| "kl": 12.665194511413574, | |
| "learning_rate": 7.83423236940225e-05, | |
| "logits/chosen": -28159568.0, | |
| "logits/rejected": -24138496.0, | |
| "logps/chosen": -394.5556884765625, | |
| "logps/rejected": -578.223876953125, | |
| "loss": 0.008, | |
| "rewards/chosen": 11.073470306396484, | |
| "rewards/margins": 31.373287709554035, | |
| "rewards/rejected": -20.29981740315755, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.05920511649154865, | |
| "grad_norm": 3.1766672134399414, | |
| "kl": 6.990899085998535, | |
| "learning_rate": 7.807764934001874e-05, | |
| "logits/chosen": -26544121.6, | |
| "logits/rejected": -19686550.666666668, | |
| "logps/chosen": -333.9852294921875, | |
| "logps/rejected": -511.3344319661458, | |
| "loss": 0.0076, | |
| "rewards/chosen": 9.996422576904298, | |
| "rewards/margins": 28.168974049886067, | |
| "rewards/rejected": -18.17255147298177, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.05957058017359525, | |
| "grad_norm": 0.21863658726215363, | |
| "kl": 0.14936065673828125, | |
| "learning_rate": 7.781182082382325e-05, | |
| "logits/chosen": -29460635.42857143, | |
| "logits/rejected": -42360552.96, | |
| "logps/chosen": -282.3582066127232, | |
| "logps/rejected": -567.3075, | |
| "loss": 0.0003, | |
| "rewards/chosen": 8.516668592180524, | |
| "rewards/margins": 33.123619275774274, | |
| "rewards/rejected": -24.60695068359375, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.05993604385564184, | |
| "grad_norm": 1.2451578378677368, | |
| "kl": 0.5987348556518555, | |
| "learning_rate": 7.754484907260513e-05, | |
| "logits/chosen": -20192930.133333333, | |
| "logits/rejected": -42657400.47058824, | |
| "logps/chosen": -346.25188802083335, | |
| "logps/rejected": -437.8046013327206, | |
| "loss": 0.0022, | |
| "rewards/chosen": 9.76473388671875, | |
| "rewards/margins": 25.446478630514704, | |
| "rewards/rejected": -15.681744743795957, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.06030150753768844, | |
| "grad_norm": 3.866580009460449, | |
| "kl": 2.7086315155029297, | |
| "learning_rate": 7.727674506052743e-05, | |
| "logits/chosen": -26440046.0, | |
| "logits/rejected": -29380092.0, | |
| "logps/chosen": -381.23895263671875, | |
| "logps/rejected": -377.08551025390625, | |
| "loss": 0.0044, | |
| "rewards/chosen": 8.652615547180176, | |
| "rewards/margins": 23.57578754425049, | |
| "rewards/rejected": -14.923171997070312, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.060666971219735036, | |
| "grad_norm": 2.844273567199707, | |
| "kl": 4.637930870056152, | |
| "learning_rate": 7.700751980829602e-05, | |
| "logits/chosen": -31675544.888888888, | |
| "logits/rejected": -27553568.0, | |
| "logps/chosen": -372.06182183159723, | |
| "logps/rejected": -462.28529575892856, | |
| "loss": 0.0158, | |
| "rewards/chosen": 9.151282416449654, | |
| "rewards/margins": 27.116133795844185, | |
| "rewards/rejected": -17.96485137939453, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.06103243490178163, | |
| "grad_norm": 3.541506767272949, | |
| "kl": 6.787299156188965, | |
| "learning_rate": 7.673718438270648e-05, | |
| "logits/chosen": -22027502.0, | |
| "logits/rejected": -34251656.0, | |
| "logps/chosen": -347.6698303222656, | |
| "logps/rejected": -606.9769897460938, | |
| "loss": 0.0039, | |
| "rewards/chosen": 11.097044944763184, | |
| "rewards/margins": 33.89655590057373, | |
| "rewards/rejected": -22.799510955810547, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.061397898583828235, | |
| "grad_norm": 0.833300769329071, | |
| "kl": 2.59067440032959, | |
| "learning_rate": 7.646574989618938e-05, | |
| "logits/chosen": -27996830.11764706, | |
| "logits/rejected": -28619340.8, | |
| "logps/chosen": -333.8934972426471, | |
| "logps/rejected": -350.493359375, | |
| "loss": 0.0007, | |
| "rewards/chosen": 11.668790031881894, | |
| "rewards/margins": 26.86366613051471, | |
| "rewards/rejected": -15.194876098632813, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.06176336226587483, | |
| "grad_norm": 3.527308702468872, | |
| "kl": 7.594015121459961, | |
| "learning_rate": 7.619322750635327e-05, | |
| "logits/chosen": -26531843.76470588, | |
| "logits/rejected": -35968247.46666667, | |
| "logps/chosen": -345.0752814797794, | |
| "logps/rejected": -437.68125, | |
| "loss": 0.0288, | |
| "rewards/chosen": 9.16690871294807, | |
| "rewards/margins": 26.81819858925015, | |
| "rewards/rejected": -17.651289876302084, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.06212882594792143, | |
| "grad_norm": 1.2971023321151733, | |
| "kl": 2.31536865234375, | |
| "learning_rate": 7.591962841552627e-05, | |
| "logits/chosen": -26244359.384615384, | |
| "logits/rejected": -45446366.315789476, | |
| "logps/chosen": -313.24350210336536, | |
| "logps/rejected": -513.1398540296053, | |
| "loss": 0.0074, | |
| "rewards/chosen": 6.6549805861253, | |
| "rewards/margins": 24.913075775270038, | |
| "rewards/rejected": -18.258095189144736, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.062494289629968024, | |
| "grad_norm": 3.557342052459717, | |
| "kl": 5.60274076461792, | |
| "learning_rate": 7.564496387029532e-05, | |
| "logits/chosen": -39386353.45454545, | |
| "logits/rejected": -20774008.38095238, | |
| "logps/chosen": -520.8385564630681, | |
| "logps/rejected": -550.8647693452381, | |
| "loss": 0.0082, | |
| "rewards/chosen": 11.914052789861506, | |
| "rewards/margins": 32.00259881618219, | |
| "rewards/rejected": -20.088546026320685, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.06285975331201461, | |
| "grad_norm": 3.5589373111724854, | |
| "kl": 5.933588981628418, | |
| "learning_rate": 7.536924516104411e-05, | |
| "logits/chosen": -27152112.0, | |
| "logits/rejected": -50107624.0, | |
| "logps/chosen": -366.93133544921875, | |
| "logps/rejected": -696.5687255859375, | |
| "loss": 0.0038, | |
| "rewards/chosen": 9.678105354309082, | |
| "rewards/margins": 32.10176181793213, | |
| "rewards/rejected": -22.423656463623047, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.06322521699406121, | |
| "grad_norm": 3.3520402908325195, | |
| "kl": 2.0572586059570312, | |
| "learning_rate": 7.509248362148889e-05, | |
| "logits/chosen": -43752658.28571428, | |
| "logits/rejected": -29103024.0, | |
| "logps/chosen": -359.9296177455357, | |
| "logps/rejected": -394.8307291666667, | |
| "loss": 0.0059, | |
| "rewards/chosen": 10.492998395647321, | |
| "rewards/margins": 25.35122559562562, | |
| "rewards/rejected": -14.858227199978298, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.0635906806761078, | |
| "grad_norm": 5.46427583694458, | |
| "kl": 10.225854873657227, | |
| "learning_rate": 7.481469062821252e-05, | |
| "logits/chosen": -16171220.705882354, | |
| "logits/rejected": -40917333.333333336, | |
| "logps/chosen": -379.10437729779414, | |
| "logps/rejected": -585.1021484375, | |
| "loss": 0.0109, | |
| "rewards/chosen": 9.008084465475644, | |
| "rewards/margins": 25.82785578709023, | |
| "rewards/rejected": -16.819771321614585, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.06395614435815442, | |
| "grad_norm": 5.1015625, | |
| "kl": 2.563922882080078, | |
| "learning_rate": 7.45358776001969e-05, | |
| "logits/chosen": -16222793.846153846, | |
| "logits/rejected": -27810910.315789472, | |
| "logps/chosen": -295.1984299879808, | |
| "logps/rejected": -432.94351356907896, | |
| "loss": 0.0109, | |
| "rewards/chosen": 7.500771155724158, | |
| "rewards/margins": 23.959707345074488, | |
| "rewards/rejected": -16.45893618935033, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.06432160804020101, | |
| "grad_norm": 3.8610777854919434, | |
| "kl": 7.177302360534668, | |
| "learning_rate": 7.425605599835361e-05, | |
| "logits/chosen": -25172003.76470588, | |
| "logits/rejected": -22787142.4, | |
| "logps/chosen": -292.3703182444853, | |
| "logps/rejected": -470.09189453125, | |
| "loss": 0.0132, | |
| "rewards/chosen": 8.342832677504596, | |
| "rewards/margins": 23.846096023858763, | |
| "rewards/rejected": -15.503263346354167, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06468707172224761, | |
| "grad_norm": 2.0071771144866943, | |
| "kl": 4.37033748626709, | |
| "learning_rate": 7.39752373250527e-05, | |
| "logits/chosen": -22452845.47368421, | |
| "logits/rejected": -18204893.53846154, | |
| "logps/chosen": -353.97923519736844, | |
| "logps/rejected": -355.22701322115387, | |
| "loss": 0.014, | |
| "rewards/chosen": 8.854493793688322, | |
| "rewards/margins": 23.776462693928707, | |
| "rewards/rejected": -14.921968900240385, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.0650525354042942, | |
| "grad_norm": 3.627746820449829, | |
| "kl": 1.5145297050476074, | |
| "learning_rate": 7.369343312364993e-05, | |
| "logits/chosen": -15702925.090909092, | |
| "logits/rejected": -31865740.19047619, | |
| "logps/chosen": -406.5943714488636, | |
| "logps/rejected": -511.3160807291667, | |
| "loss": 0.0046, | |
| "rewards/chosen": 7.576521439985796, | |
| "rewards/margins": 28.051678678174042, | |
| "rewards/rejected": -20.475157238188245, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.0654179990863408, | |
| "grad_norm": 6.4200897216796875, | |
| "kl": 0.8539514541625977, | |
| "learning_rate": 7.34106549780123e-05, | |
| "logits/chosen": -24184929.777777776, | |
| "logits/rejected": -28082998.85714286, | |
| "logps/chosen": -268.18614366319446, | |
| "logps/rejected": -423.1952427455357, | |
| "loss": 0.0238, | |
| "rewards/chosen": 8.05920155843099, | |
| "rewards/margins": 26.53423381987072, | |
| "rewards/rejected": -18.475032261439733, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.0657834627683874, | |
| "grad_norm": 7.6310038566589355, | |
| "kl": 5.401153564453125, | |
| "learning_rate": 7.312691451204178e-05, | |
| "logits/chosen": -18243305.411764707, | |
| "logits/rejected": -31956266.666666668, | |
| "logps/chosen": -348.9458869485294, | |
| "logps/rejected": -531.7514322916667, | |
| "loss": 0.0095, | |
| "rewards/chosen": 8.305734073414522, | |
| "rewards/margins": 27.884455183440565, | |
| "rewards/rejected": -19.57872111002604, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06614892645043399, | |
| "grad_norm": 2.335725784301758, | |
| "kl": 7.708461761474609, | |
| "learning_rate": 7.284222338919758e-05, | |
| "logits/chosen": -26012860.63157895, | |
| "logits/rejected": -42592851.692307696, | |
| "logps/chosen": -356.98671361019734, | |
| "logps/rejected": -538.6518930288462, | |
| "loss": 0.0081, | |
| "rewards/chosen": 10.63698377107319, | |
| "rewards/margins": 29.442090300895906, | |
| "rewards/rejected": -18.805106529822716, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.06651439013248059, | |
| "grad_norm": 0.36761146783828735, | |
| "kl": 0.5928888320922852, | |
| "learning_rate": 7.255659331201673e-05, | |
| "logits/chosen": -12418821.0, | |
| "logits/rejected": -28695830.0, | |
| "logps/chosen": -296.0104064941406, | |
| "logps/rejected": -399.7171630859375, | |
| "loss": 0.0005, | |
| "rewards/chosen": 8.365544319152832, | |
| "rewards/margins": 23.712489128112793, | |
| "rewards/rejected": -15.346944808959961, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.06687985381452718, | |
| "grad_norm": 3.1898019313812256, | |
| "kl": 4.412371635437012, | |
| "learning_rate": 7.227003602163295e-05, | |
| "logits/chosen": -32701537.88235294, | |
| "logits/rejected": -27279308.8, | |
| "logps/chosen": -333.3812902113971, | |
| "logps/rejected": -495.849609375, | |
| "loss": 0.0035, | |
| "rewards/chosen": 8.908136704388786, | |
| "rewards/margins": 27.76432973824295, | |
| "rewards/rejected": -18.856193033854165, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.06724531749657378, | |
| "grad_norm": 5.877505302429199, | |
| "kl": 9.530031204223633, | |
| "learning_rate": 7.198256329729412e-05, | |
| "logits/chosen": -26423022.222222224, | |
| "logits/rejected": -31291769.14285714, | |
| "logps/chosen": -414.0874294704861, | |
| "logps/rejected": -372.7887486049107, | |
| "loss": 0.0258, | |
| "rewards/chosen": 8.350198533799913, | |
| "rewards/margins": 22.83146401057168, | |
| "rewards/rejected": -14.481265476771764, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.06761078117862038, | |
| "grad_norm": 2.177922248840332, | |
| "kl": 5.663725852966309, | |
| "learning_rate": 7.169418695587791e-05, | |
| "logits/chosen": -17323242.666666668, | |
| "logits/rejected": -35728736.0, | |
| "logps/chosen": -327.0119900173611, | |
| "logps/rejected": -412.28857421875, | |
| "loss": 0.0097, | |
| "rewards/chosen": 8.765028211805555, | |
| "rewards/margins": 21.80064440530444, | |
| "rewards/rejected": -13.035616193498884, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.06797624486066697, | |
| "grad_norm": 3.550206422805786, | |
| "kl": 3.0919437408447266, | |
| "learning_rate": 7.14049188514063e-05, | |
| "logits/chosen": -32884610.133333333, | |
| "logits/rejected": -26519314.82352941, | |
| "logps/chosen": -367.80719401041665, | |
| "logps/rejected": -523.7941176470588, | |
| "loss": 0.0133, | |
| "rewards/chosen": 9.515175374348958, | |
| "rewards/margins": 26.152344707414215, | |
| "rewards/rejected": -16.63716933306526, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.06834170854271357, | |
| "grad_norm": 10.884556770324707, | |
| "kl": 1.7897157669067383, | |
| "learning_rate": 7.1114770874558e-05, | |
| "logits/chosen": -22171801.333333332, | |
| "logits/rejected": -25333996.8, | |
| "logps/chosen": -309.26092529296875, | |
| "logps/rejected": -387.01396484375, | |
| "loss": 0.0328, | |
| "rewards/chosen": 7.895773569742839, | |
| "rewards/margins": 19.213196818033854, | |
| "rewards/rejected": -11.317423248291016, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.06870717222476017, | |
| "grad_norm": 5.466808795928955, | |
| "kl": 2.5527238845825195, | |
| "learning_rate": 7.082375495217995e-05, | |
| "logits/chosen": -18060890.181818184, | |
| "logits/rejected": -30156083.80952381, | |
| "logps/chosen": -302.6913396661932, | |
| "logps/rejected": -556.6991722470239, | |
| "loss": 0.0082, | |
| "rewards/chosen": 7.865567294034091, | |
| "rewards/margins": 24.122369807519952, | |
| "rewards/rejected": -16.256802513485862, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06907263590680676, | |
| "grad_norm": 3.0002336502075195, | |
| "kl": 8.360982894897461, | |
| "learning_rate": 7.05318830467969e-05, | |
| "logits/chosen": -16755614.11764706, | |
| "logits/rejected": -21894227.2, | |
| "logps/chosen": -408.18017578125, | |
| "logps/rejected": -459.7638346354167, | |
| "loss": 0.0056, | |
| "rewards/chosen": 9.41691140567555, | |
| "rewards/margins": 26.219946887446383, | |
| "rewards/rejected": -16.803035481770834, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.06943809958885336, | |
| "grad_norm": 4.142535209655762, | |
| "kl": 7.133042335510254, | |
| "learning_rate": 7.023916715611969e-05, | |
| "logits/chosen": -23126666.666666668, | |
| "logits/rejected": -20533851.42857143, | |
| "logps/chosen": -387.868408203125, | |
| "logps/rejected": -469.752685546875, | |
| "loss": 0.0177, | |
| "rewards/chosen": 10.944979349772135, | |
| "rewards/margins": 31.230931236630397, | |
| "rewards/rejected": -20.28595188685826, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06980356327089995, | |
| "grad_norm": 2.85412335395813, | |
| "kl": 11.062262535095215, | |
| "learning_rate": 6.99456193125521e-05, | |
| "logits/chosen": -27307265.777777776, | |
| "logits/rejected": -28493636.57142857, | |
| "logps/chosen": -354.16937934027777, | |
| "logps/rejected": -471.00142996651783, | |
| "loss": 0.0128, | |
| "rewards/chosen": 10.214418199327257, | |
| "rewards/margins": 24.905228266640314, | |
| "rewards/rejected": -14.690810067313057, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.07016902695294655, | |
| "grad_norm": 1.3512822389602661, | |
| "kl": 1.3055610656738281, | |
| "learning_rate": 6.965125158269619e-05, | |
| "logits/chosen": -16317834.666666666, | |
| "logits/rejected": -16164295.529411765, | |
| "logps/chosen": -380.50751953125, | |
| "logps/rejected": -408.55471622242646, | |
| "loss": 0.0015, | |
| "rewards/chosen": 9.312572224934895, | |
| "rewards/margins": 24.847101967007504, | |
| "rewards/rejected": -15.53452974207261, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.07053449063499315, | |
| "grad_norm": 4.69804573059082, | |
| "kl": 4.003072738647461, | |
| "learning_rate": 6.935607606685642e-05, | |
| "logits/chosen": -11690886.0, | |
| "logits/rejected": -32503590.0, | |
| "logps/chosen": -323.9889831542969, | |
| "logps/rejected": -587.8881225585938, | |
| "loss": 0.0088, | |
| "rewards/chosen": 8.519304275512695, | |
| "rewards/margins": 24.23267364501953, | |
| "rewards/rejected": -15.713369369506836, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.07089995431703974, | |
| "grad_norm": 6.894629001617432, | |
| "kl": 4.381341934204102, | |
| "learning_rate": 6.906010489854209e-05, | |
| "logits/chosen": -15500551.384615384, | |
| "logits/rejected": -23350785.684210528, | |
| "logps/chosen": -336.66488882211536, | |
| "logps/rejected": -493.66015625, | |
| "loss": 0.01, | |
| "rewards/chosen": 9.593859159029448, | |
| "rewards/margins": 29.907944466903622, | |
| "rewards/rejected": -20.314085307874176, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.07126541799908634, | |
| "grad_norm": 4.009649276733398, | |
| "kl": 2.7879061698913574, | |
| "learning_rate": 6.876335024396872e-05, | |
| "logits/chosen": -16030878.11764706, | |
| "logits/rejected": -25318824.533333335, | |
| "logps/chosen": -379.2220243566176, | |
| "logps/rejected": -391.873046875, | |
| "loss": 0.0154, | |
| "rewards/chosen": 9.184873693129596, | |
| "rewards/margins": 24.023857864679073, | |
| "rewards/rejected": -14.838984171549479, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07163088168113294, | |
| "grad_norm": 3.0264062881469727, | |
| "kl": 3.0212202072143555, | |
| "learning_rate": 6.846582430155783e-05, | |
| "logits/chosen": -28066490.666666668, | |
| "logits/rejected": -20612891.2, | |
| "logps/chosen": -404.5044352213542, | |
| "logps/rejected": -499.241943359375, | |
| "loss": 0.0056, | |
| "rewards/chosen": 11.594781239827475, | |
| "rewards/margins": 30.13075383504232, | |
| "rewards/rejected": -18.535972595214844, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.07199634536317953, | |
| "grad_norm": 7.040284633636475, | |
| "kl": 6.741287708282471, | |
| "learning_rate": 6.816753930143558e-05, | |
| "logits/chosen": -30354290.52631579, | |
| "logits/rejected": -21988878.769230768, | |
| "logps/chosen": -410.85572574013156, | |
| "logps/rejected": -503.72554837740387, | |
| "loss": 0.0168, | |
| "rewards/chosen": 9.378283048930921, | |
| "rewards/margins": 26.061604488233804, | |
| "rewards/rejected": -16.683321439302883, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.07236180904522613, | |
| "grad_norm": 2.600749969482422, | |
| "kl": 5.778753280639648, | |
| "learning_rate": 6.786850750493006e-05, | |
| "logits/chosen": -26646170.0, | |
| "logits/rejected": -32755876.0, | |
| "logps/chosen": -330.4010009765625, | |
| "logps/rejected": -559.676025390625, | |
| "loss": 0.0029, | |
| "rewards/chosen": 11.444527626037598, | |
| "rewards/margins": 32.67663860321045, | |
| "rewards/rejected": -21.23211097717285, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 2.6039352416992188, | |
| "kl": 6.420266151428223, | |
| "learning_rate": 6.756874120406714e-05, | |
| "logits/chosen": -17844263.529411763, | |
| "logits/rejected": -29301909.333333332, | |
| "logps/chosen": -283.4960075827206, | |
| "logps/rejected": -408.2862955729167, | |
| "loss": 0.0064, | |
| "rewards/chosen": 11.534720028147978, | |
| "rewards/margins": 28.537897925283396, | |
| "rewards/rejected": -17.003177897135416, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.07309273640931932, | |
| "grad_norm": 21.8624210357666, | |
| "kl": 9.30317497253418, | |
| "learning_rate": 6.726825272106538e-05, | |
| "logits/chosen": -17542368.0, | |
| "logits/rejected": -33012054.4, | |
| "logps/chosen": -447.385009765625, | |
| "logps/rejected": -426.5275390625, | |
| "loss": 0.0312, | |
| "rewards/chosen": 11.985382080078125, | |
| "rewards/margins": 26.30437774658203, | |
| "rewards/rejected": -14.318995666503906, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07345820009136592, | |
| "grad_norm": 2.5858287811279297, | |
| "kl": 3.965498447418213, | |
| "learning_rate": 6.696705440782938e-05, | |
| "logits/chosen": -9650119.529411765, | |
| "logits/rejected": -33370368.0, | |
| "logps/chosen": -325.3095128676471, | |
| "logps/rejected": -472.2998046875, | |
| "loss": 0.0065, | |
| "rewards/chosen": 8.496898875517005, | |
| "rewards/margins": 27.648915070178465, | |
| "rewards/rejected": -19.15201619466146, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.07382366377341251, | |
| "grad_norm": 3.9915287494659424, | |
| "kl": 4.856705188751221, | |
| "learning_rate": 6.666515864544209e-05, | |
| "logits/chosen": -10674379.733333332, | |
| "logits/rejected": -26997652.70588235, | |
| "logps/chosen": -341.36578776041665, | |
| "logps/rejected": -388.1389590992647, | |
| "loss": 0.0125, | |
| "rewards/chosen": 9.1323486328125, | |
| "rewards/margins": 23.453135950425093, | |
| "rewards/rejected": -14.320787317612591, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07418912745545911, | |
| "grad_norm": 1.1339577436447144, | |
| "kl": 8.12137222290039, | |
| "learning_rate": 6.636257784365584e-05, | |
| "logits/chosen": -28512064.0, | |
| "logits/rejected": -31698720.0, | |
| "logps/chosen": -406.0943603515625, | |
| "logps/rejected": -469.087109375, | |
| "loss": 0.0009, | |
| "rewards/chosen": 11.925963083902994, | |
| "rewards/margins": 28.539972178141277, | |
| "rewards/rejected": -16.61400909423828, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.0745545911375057, | |
| "grad_norm": 3.5026252269744873, | |
| "kl": 10.456775665283203, | |
| "learning_rate": 6.605932444038229e-05, | |
| "logits/chosen": -23463275.789473683, | |
| "logits/rejected": -25056219.076923076, | |
| "logps/chosen": -392.6201171875, | |
| "logps/rejected": -383.59487680288464, | |
| "loss": 0.0106, | |
| "rewards/chosen": 10.768471968801398, | |
| "rewards/margins": 25.321538515901757, | |
| "rewards/rejected": -14.55306654710036, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.0749200548195523, | |
| "grad_norm": 3.906362533569336, | |
| "kl": 3.322394371032715, | |
| "learning_rate": 6.575541090118105e-05, | |
| "logits/chosen": -15754039.0, | |
| "logits/rejected": -17539962.0, | |
| "logps/chosen": -350.6761169433594, | |
| "logps/rejected": -438.69488525390625, | |
| "loss": 0.0049, | |
| "rewards/chosen": 10.065705299377441, | |
| "rewards/margins": 27.962946891784668, | |
| "rewards/rejected": -17.897241592407227, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.0752855185015989, | |
| "grad_norm": 1.6618068218231201, | |
| "kl": 3.0612802505493164, | |
| "learning_rate": 6.545084971874738e-05, | |
| "logits/chosen": -17744706.0, | |
| "logits/rejected": -30604094.0, | |
| "logps/chosen": -380.76800537109375, | |
| "logps/rejected": -345.7037048339844, | |
| "loss": 0.0021, | |
| "rewards/chosen": 10.43433952331543, | |
| "rewards/margins": 23.519795417785645, | |
| "rewards/rejected": -13.085455894470215, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.07565098218364551, | |
| "grad_norm": 4.202422142028809, | |
| "kl": 0.6333751678466797, | |
| "learning_rate": 6.514565341239861e-05, | |
| "logits/chosen": -12794493.090909092, | |
| "logits/rejected": -26353200.76190476, | |
| "logps/chosen": -282.27676669034093, | |
| "logps/rejected": -562.2889694940476, | |
| "loss": 0.0028, | |
| "rewards/chosen": 10.231034712357955, | |
| "rewards/margins": 30.530086632930875, | |
| "rewards/rejected": -20.299051920572918, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.0760164458656921, | |
| "grad_norm": 10.078681945800781, | |
| "kl": 8.571239471435547, | |
| "learning_rate": 6.483983452755953e-05, | |
| "logits/chosen": -23175141.333333332, | |
| "logits/rejected": -19565435.42857143, | |
| "logps/chosen": -344.9137912326389, | |
| "logps/rejected": -472.61160714285717, | |
| "loss": 0.0271, | |
| "rewards/chosen": 9.744666205512154, | |
| "rewards/margins": 25.424937899150546, | |
| "rewards/rejected": -15.680271693638392, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.0763819095477387, | |
| "grad_norm": 3.214233160018921, | |
| "kl": 4.615579605102539, | |
| "learning_rate": 6.453340563524669e-05, | |
| "logits/chosen": -27926275.36842105, | |
| "logits/rejected": -43859387.07692308, | |
| "logps/chosen": -343.65013363486844, | |
| "logps/rejected": -648.1381460336538, | |
| "loss": 0.0144, | |
| "rewards/chosen": 9.325342278731497, | |
| "rewards/margins": 26.778057638932818, | |
| "rewards/rejected": -17.45271536020132, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.0767473732297853, | |
| "grad_norm": 4.102214813232422, | |
| "kl": 12.734009742736816, | |
| "learning_rate": 6.422637933155162e-05, | |
| "logits/chosen": -21794382.222222224, | |
| "logits/rejected": -37066509.71428572, | |
| "logps/chosen": -374.322265625, | |
| "logps/rejected": -411.320556640625, | |
| "loss": 0.0151, | |
| "rewards/chosen": 11.905857510036892, | |
| "rewards/margins": 31.9157234070793, | |
| "rewards/rejected": -20.00986589704241, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07711283691183189, | |
| "grad_norm": 1.798668622970581, | |
| "kl": 11.159156799316406, | |
| "learning_rate": 6.391876823712317e-05, | |
| "logits/chosen": -14627946.666666666, | |
| "logits/rejected": -22159533.714285713, | |
| "logps/chosen": -348.218017578125, | |
| "logps/rejected": -406.6967075892857, | |
| "loss": 0.0084, | |
| "rewards/chosen": 10.321999443901909, | |
| "rewards/margins": 25.591409713502912, | |
| "rewards/rejected": -15.269410269601005, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.07747830059387849, | |
| "grad_norm": 1.6394319534301758, | |
| "kl": 1.3272171020507812, | |
| "learning_rate": 6.361058499664856e-05, | |
| "logits/chosen": -15764256.0, | |
| "logits/rejected": -22281297.066666666, | |
| "logps/chosen": -284.83108340992646, | |
| "logps/rejected": -419.1984049479167, | |
| "loss": 0.004, | |
| "rewards/chosen": 8.969033633961397, | |
| "rewards/margins": 27.95632407992494, | |
| "rewards/rejected": -18.98729044596354, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.07784376427592508, | |
| "grad_norm": 7.77843713760376, | |
| "kl": 9.889145851135254, | |
| "learning_rate": 6.330184227833376e-05, | |
| "logits/chosen": -25250788.266666666, | |
| "logits/rejected": -13045229.176470589, | |
| "logps/chosen": -413.92347005208336, | |
| "logps/rejected": -360.48127297794116, | |
| "loss": 0.0251, | |
| "rewards/chosen": 10.608176676432292, | |
| "rewards/margins": 25.95429448146446, | |
| "rewards/rejected": -15.346117805032168, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.07820922795797168, | |
| "grad_norm": 20.824413299560547, | |
| "kl": 14.523167610168457, | |
| "learning_rate": 6.299255277338265e-05, | |
| "logits/chosen": -19816242.285714287, | |
| "logits/rejected": -40381358.54545455, | |
| "logps/chosen": -422.2455357142857, | |
| "logps/rejected": -556.1534978693181, | |
| "loss": 0.0265, | |
| "rewards/chosen": 11.749418712797619, | |
| "rewards/margins": 30.31490111247802, | |
| "rewards/rejected": -18.5654823996804, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.07857469164001828, | |
| "grad_norm": 3.581092119216919, | |
| "kl": 3.292311668395996, | |
| "learning_rate": 6.268272919547537e-05, | |
| "logits/chosen": -24386698.0, | |
| "logits/rejected": -31041716.0, | |
| "logps/chosen": -345.5860595703125, | |
| "logps/rejected": -566.6504516601562, | |
| "loss": 0.0164, | |
| "rewards/chosen": 7.3111958503723145, | |
| "rewards/margins": 29.830772876739502, | |
| "rewards/rejected": -22.519577026367188, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.07894015532206487, | |
| "grad_norm": 0.4833795726299286, | |
| "kl": 4.088086128234863, | |
| "learning_rate": 6.237238428024572e-05, | |
| "logits/chosen": -33289578.666666668, | |
| "logits/rejected": -28853592.470588237, | |
| "logps/chosen": -423.1152669270833, | |
| "logps/rejected": -492.9146943933824, | |
| "loss": 0.0005, | |
| "rewards/chosen": 11.488334147135417, | |
| "rewards/margins": 29.890440937117035, | |
| "rewards/rejected": -18.402106789981616, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.07930561900411147, | |
| "grad_norm": 2.3684840202331543, | |
| "kl": 1.4258842468261719, | |
| "learning_rate": 6.206153078475763e-05, | |
| "logits/chosen": -25143190.4, | |
| "logits/rejected": -23845405.09090909, | |
| "logps/chosen": -348.024267578125, | |
| "logps/rejected": -462.46826171875, | |
| "loss": 0.0071, | |
| "rewards/chosen": 8.960430145263672, | |
| "rewards/margins": 26.658276991410688, | |
| "rewards/rejected": -17.697846846147016, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.07967108268615807, | |
| "grad_norm": 3.4027488231658936, | |
| "kl": 3.7008113861083984, | |
| "learning_rate": 6.175018148698077e-05, | |
| "logits/chosen": -29465239.57894737, | |
| "logits/rejected": -32449619.692307692, | |
| "logps/chosen": -399.4223889802632, | |
| "logps/rejected": -417.22164212740387, | |
| "loss": 0.0096, | |
| "rewards/chosen": 8.753948010896382, | |
| "rewards/margins": 21.67959427930083, | |
| "rewards/rejected": -12.925646268404448, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.08003654636820466, | |
| "grad_norm": 5.0857672691345215, | |
| "kl": 5.551398277282715, | |
| "learning_rate": 6.143834918526527e-05, | |
| "logits/chosen": -16367331.764705881, | |
| "logits/rejected": -24942521.6, | |
| "logps/chosen": -356.2471564797794, | |
| "logps/rejected": -417.2796875, | |
| "loss": 0.0205, | |
| "rewards/chosen": 8.824462890625, | |
| "rewards/margins": 27.034195963541666, | |
| "rewards/rejected": -18.209733072916666, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.08040201005025126, | |
| "grad_norm": 5.110848426818848, | |
| "kl": 5.129820823669434, | |
| "learning_rate": 6.112604669781572e-05, | |
| "logits/chosen": -3879200.5, | |
| "logits/rejected": -24118480.0, | |
| "logps/chosen": -329.4516906738281, | |
| "logps/rejected": -450.46142578125, | |
| "loss": 0.0129, | |
| "rewards/chosen": 8.563543319702148, | |
| "rewards/margins": 29.11154556274414, | |
| "rewards/rejected": -20.548002243041992, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08076747373229785, | |
| "grad_norm": 1.9817887544631958, | |
| "kl": 2.890030860900879, | |
| "learning_rate": 6.081328686216418e-05, | |
| "logits/chosen": -37181917.86666667, | |
| "logits/rejected": -40966580.705882356, | |
| "logps/chosen": -413.9736653645833, | |
| "logps/rejected": -630.806640625, | |
| "loss": 0.0033, | |
| "rewards/chosen": 10.92909647623698, | |
| "rewards/margins": 36.46276634066713, | |
| "rewards/rejected": -25.53366986443015, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.08113293741434445, | |
| "grad_norm": 3.6742677688598633, | |
| "kl": 6.32096529006958, | |
| "learning_rate": 6.0500082534642464e-05, | |
| "logits/chosen": -32207366.4, | |
| "logits/rejected": -22394597.647058822, | |
| "logps/chosen": -402.03782552083334, | |
| "logps/rejected": -504.08616727941177, | |
| "loss": 0.0053, | |
| "rewards/chosen": 10.916023763020833, | |
| "rewards/margins": 30.193345971200984, | |
| "rewards/rejected": -19.27732220818015, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.08149840109639105, | |
| "grad_norm": 21.385496139526367, | |
| "kl": 2.4638442993164062, | |
| "learning_rate": 6.0186446589853784e-05, | |
| "logits/chosen": -12316193.454545455, | |
| "logits/rejected": -11932992.0, | |
| "logps/chosen": -411.81010298295456, | |
| "logps/rejected": -446.3408668154762, | |
| "loss": 0.0232, | |
| "rewards/chosen": 10.413556879216975, | |
| "rewards/margins": 26.619274420139597, | |
| "rewards/rejected": -16.20571754092262, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.08186386477843764, | |
| "grad_norm": 7.697639465332031, | |
| "kl": 10.840798377990723, | |
| "learning_rate": 5.987239192014336e-05, | |
| "logits/chosen": -15168502.857142856, | |
| "logits/rejected": -12821316.363636363, | |
| "logps/chosen": -311.9120628720238, | |
| "logps/rejected": -273.797119140625, | |
| "loss": 0.0284, | |
| "rewards/chosen": 6.746843610491071, | |
| "rewards/margins": 16.119167773754565, | |
| "rewards/rejected": -9.372324163263494, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.08222932846048424, | |
| "grad_norm": 27.4875545501709, | |
| "kl": 7.211214542388916, | |
| "learning_rate": 5.955793143506863e-05, | |
| "logits/chosen": -12500211.368421054, | |
| "logits/rejected": -20363127.384615384, | |
| "logps/chosen": -317.6380037006579, | |
| "logps/rejected": -462.81689453125, | |
| "loss": 0.0543, | |
| "rewards/chosen": 8.480968274568257, | |
| "rewards/margins": 22.93068352208929, | |
| "rewards/rejected": -14.449715247521034, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.08259479214253083, | |
| "grad_norm": 5.119180679321289, | |
| "kl": 3.8821263313293457, | |
| "learning_rate": 5.924307806086844e-05, | |
| "logits/chosen": -20646944.0, | |
| "logits/rejected": -4519772.4, | |
| "logps/chosen": -360.2757568359375, | |
| "logps/rejected": -552.817529296875, | |
| "loss": 0.0129, | |
| "rewards/chosen": 8.117130915323893, | |
| "rewards/margins": 26.899151484171547, | |
| "rewards/rejected": -18.782020568847656, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.08296025582457743, | |
| "grad_norm": 6.482161998748779, | |
| "kl": 0.0, | |
| "learning_rate": 5.8927844739931834e-05, | |
| "logits/chosen": -12243009.454545455, | |
| "logits/rejected": -21820697.904761903, | |
| "logps/chosen": -294.188720703125, | |
| "logps/rejected": -468.8477492559524, | |
| "loss": 0.0073, | |
| "rewards/chosen": 8.640039617365057, | |
| "rewards/margins": 28.237434288123985, | |
| "rewards/rejected": -19.597394670758927, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.08332571950662403, | |
| "grad_norm": 5.045980930328369, | |
| "kl": 8.78582763671875, | |
| "learning_rate": 5.861224443026595e-05, | |
| "logits/chosen": -5696805.333333333, | |
| "logits/rejected": -7406330.285714285, | |
| "logps/chosen": -302.4457736545139, | |
| "logps/rejected": -459.8894740513393, | |
| "loss": 0.0145, | |
| "rewards/chosen": 8.533818562825521, | |
| "rewards/margins": 23.901186988467263, | |
| "rewards/rejected": -15.367368425641741, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.08369118318867062, | |
| "grad_norm": 7.285205364227295, | |
| "kl": 9.07259750366211, | |
| "learning_rate": 5.82962901049634e-05, | |
| "logits/chosen": -3893539.294117647, | |
| "logits/rejected": -25990393.6, | |
| "logps/chosen": -337.59142348345586, | |
| "logps/rejected": -577.366796875, | |
| "loss": 0.0182, | |
| "rewards/chosen": 8.984709795783548, | |
| "rewards/margins": 30.35168570724188, | |
| "rewards/rejected": -21.366975911458333, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.08405664687071722, | |
| "grad_norm": 4.332226276397705, | |
| "kl": 12.022812843322754, | |
| "learning_rate": 5.7979994751668964e-05, | |
| "logits/chosen": -17081276.444444444, | |
| "logits/rejected": -16491866.285714285, | |
| "logps/chosen": -378.99175347222223, | |
| "logps/rejected": -461.9462193080357, | |
| "loss": 0.0175, | |
| "rewards/chosen": 10.123255411783854, | |
| "rewards/margins": 26.100438072567893, | |
| "rewards/rejected": -15.97718266078404, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08442211055276382, | |
| "grad_norm": 1.7576537132263184, | |
| "kl": 4.291003227233887, | |
| "learning_rate": 5.766337137204579e-05, | |
| "logits/chosen": -18471228.23529412, | |
| "logits/rejected": -14284845.866666667, | |
| "logps/chosen": -330.1019071691176, | |
| "logps/rejected": -343.8628255208333, | |
| "loss": 0.0055, | |
| "rewards/chosen": 10.958397360409007, | |
| "rewards/margins": 23.948498475317862, | |
| "rewards/rejected": -12.990101114908855, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.08478757423481041, | |
| "grad_norm": 12.976097106933594, | |
| "kl": 0.06497049331665039, | |
| "learning_rate": 5.7346432981240904e-05, | |
| "logits/chosen": -12242513.23076923, | |
| "logits/rejected": 8392253.47368421, | |
| "logps/chosen": -276.62389197716345, | |
| "logps/rejected": -548.2805304276316, | |
| "loss": 0.0095, | |
| "rewards/chosen": 8.222402132474459, | |
| "rewards/margins": 32.36831791680834, | |
| "rewards/rejected": -24.14591578433388, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.08515303791685701, | |
| "grad_norm": 4.58160400390625, | |
| "kl": 8.429978370666504, | |
| "learning_rate": 5.7029192607350146e-05, | |
| "logits/chosen": -22118390.0, | |
| "logits/rejected": -24906762.0, | |
| "logps/chosen": -379.9720153808594, | |
| "logps/rejected": -556.9617919921875, | |
| "loss": 0.0139, | |
| "rewards/chosen": 9.560355186462402, | |
| "rewards/margins": 32.60457134246826, | |
| "rewards/rejected": -23.04421615600586, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.0855185015989036, | |
| "grad_norm": 1.507628083229065, | |
| "kl": 1.1839404106140137, | |
| "learning_rate": 5.6711663290882776e-05, | |
| "logits/chosen": -14845961.142857144, | |
| "logits/rejected": -15407089.777777778, | |
| "logps/chosen": -329.89181082589283, | |
| "logps/rejected": -493.96733940972223, | |
| "loss": 0.0055, | |
| "rewards/chosen": 9.155017307826451, | |
| "rewards/margins": 28.94087376670232, | |
| "rewards/rejected": -19.78585645887587, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.0858839652809502, | |
| "grad_norm": 3.848444938659668, | |
| "kl": 9.833124160766602, | |
| "learning_rate": 5.6393858084225305e-05, | |
| "logits/chosen": -6498912.0, | |
| "logits/rejected": -18224133.333333332, | |
| "logps/chosen": -333.9006453804348, | |
| "logps/rejected": -521.8710394965278, | |
| "loss": 0.0223, | |
| "rewards/chosen": 7.9921742314877715, | |
| "rewards/margins": 30.287640336631, | |
| "rewards/rejected": -22.29546610514323, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.0862494289629968, | |
| "grad_norm": 2.7810912132263184, | |
| "kl": 5.483455181121826, | |
| "learning_rate": 5.6075790051105023e-05, | |
| "logits/chosen": -7817208.0, | |
| "logits/rejected": -16416721.0, | |
| "logps/chosen": -362.0771484375, | |
| "logps/rejected": -472.74322509765625, | |
| "loss": 0.005, | |
| "rewards/chosen": 9.401847839355469, | |
| "rewards/margins": 31.938417434692383, | |
| "rewards/rejected": -22.536569595336914, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.08661489264504339, | |
| "grad_norm": 3.110553741455078, | |
| "kl": 3.809964656829834, | |
| "learning_rate": 5.575747226605298e-05, | |
| "logits/chosen": -8490709.05263158, | |
| "logits/rejected": -11372866.461538462, | |
| "logps/chosen": -400.8346011513158, | |
| "logps/rejected": -545.5910081129807, | |
| "loss": 0.0086, | |
| "rewards/chosen": 8.414623059724507, | |
| "rewards/margins": 33.92073361786753, | |
| "rewards/rejected": -25.50611055814303, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.08698035632708999, | |
| "grad_norm": 1.600816011428833, | |
| "kl": 2.572188377380371, | |
| "learning_rate": 5.5438917813866554e-05, | |
| "logits/chosen": -11434878.76923077, | |
| "logits/rejected": -13644350.315789474, | |
| "logps/chosen": -315.9757737379808, | |
| "logps/rejected": -508.71952097039474, | |
| "loss": 0.0016, | |
| "rewards/chosen": 10.039117666391226, | |
| "rewards/margins": 29.60621297118152, | |
| "rewards/rejected": -19.567095304790296, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.0873458200091366, | |
| "grad_norm": 2.0251121520996094, | |
| "kl": 3.4234132766723633, | |
| "learning_rate": 5.512013978907157e-05, | |
| "logits/chosen": -2543081.8666666667, | |
| "logits/rejected": -14225318.588235294, | |
| "logps/chosen": -282.37080078125, | |
| "logps/rejected": -556.6636029411765, | |
| "loss": 0.0043, | |
| "rewards/chosen": 8.273530069986979, | |
| "rewards/margins": 32.72615242752374, | |
| "rewards/rejected": -24.452622357536764, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.0877112836911832, | |
| "grad_norm": 2.7250382900238037, | |
| "kl": 5.877777099609375, | |
| "learning_rate": 5.480115129538409e-05, | |
| "logits/chosen": -8596929.142857144, | |
| "logits/rejected": -11893044.444444444, | |
| "logps/chosen": -389.1763392857143, | |
| "logps/rejected": -416.30718315972223, | |
| "loss": 0.0099, | |
| "rewards/chosen": 9.133932931082589, | |
| "rewards/margins": 28.867777264307414, | |
| "rewards/rejected": -19.733844333224827, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08807674737322979, | |
| "grad_norm": 2.191073417663574, | |
| "kl": 3.252370595932007, | |
| "learning_rate": 5.448196544517168e-05, | |
| "logits/chosen": -16885422.0, | |
| "logits/rejected": -12586504.0, | |
| "logps/chosen": -303.47698974609375, | |
| "logps/rejected": -482.5170593261719, | |
| "loss": 0.0061, | |
| "rewards/chosen": 10.387802124023438, | |
| "rewards/margins": 31.04973602294922, | |
| "rewards/rejected": -20.66193389892578, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.08844221105527639, | |
| "grad_norm": 1.3790781497955322, | |
| "kl": 1.623072624206543, | |
| "learning_rate": 5.416259535891447e-05, | |
| "logits/chosen": -5154499.0, | |
| "logits/rejected": -19852082.0, | |
| "logps/chosen": -335.4534606933594, | |
| "logps/rejected": -567.6194458007812, | |
| "loss": 0.0022, | |
| "rewards/chosen": 10.556591987609863, | |
| "rewards/margins": 35.19700908660889, | |
| "rewards/rejected": -24.640417098999023, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.08880767473732298, | |
| "grad_norm": 7.348330497741699, | |
| "kl": 2.0587451457977295, | |
| "learning_rate": 5.384305416466584e-05, | |
| "logits/chosen": -3913020.5, | |
| "logits/rejected": -14499589.0, | |
| "logps/chosen": -292.5793762207031, | |
| "logps/rejected": -463.9847717285156, | |
| "loss": 0.007, | |
| "rewards/chosen": 7.284144878387451, | |
| "rewards/margins": 25.658551692962646, | |
| "rewards/rejected": -18.374406814575195, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.08917313841936958, | |
| "grad_norm": 1.5713036060333252, | |
| "kl": 3.138895034790039, | |
| "learning_rate": 5.35233549975127e-05, | |
| "logits/chosen": -14131090.133333333, | |
| "logits/rejected": -7379913.882352941, | |
| "logps/chosen": -321.50445963541665, | |
| "logps/rejected": -477.98747702205884, | |
| "loss": 0.0082, | |
| "rewards/chosen": 8.999294026692708, | |
| "rewards/margins": 30.62633690927543, | |
| "rewards/rejected": -21.62704288258272, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.08953860210141618, | |
| "grad_norm": 10.220050811767578, | |
| "kl": 4.970325469970703, | |
| "learning_rate": 5.320351099903565e-05, | |
| "logits/chosen": -7353914.947368421, | |
| "logits/rejected": -9241026.461538462, | |
| "logps/chosen": -322.42269736842104, | |
| "logps/rejected": -513.96875, | |
| "loss": 0.0261, | |
| "rewards/chosen": 9.780102378443667, | |
| "rewards/margins": 30.248021361316262, | |
| "rewards/rejected": -20.467918982872597, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.08990406578346277, | |
| "grad_norm": 9.520925521850586, | |
| "kl": 3.0357470512390137, | |
| "learning_rate": 5.288353531676873e-05, | |
| "logits/chosen": -10981987.0, | |
| "logits/rejected": -16637833.0, | |
| "logps/chosen": -346.5321350097656, | |
| "logps/rejected": -475.2821044921875, | |
| "loss": 0.024, | |
| "rewards/chosen": 7.337143421173096, | |
| "rewards/margins": 27.051758289337158, | |
| "rewards/rejected": -19.714614868164062, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.09026952946550937, | |
| "grad_norm": 0.9409910440444946, | |
| "kl": 1.646733283996582, | |
| "learning_rate": 5.256344110365896e-05, | |
| "logits/chosen": -9656946.133333333, | |
| "logits/rejected": -21951666.82352941, | |
| "logps/chosen": -390.64856770833336, | |
| "logps/rejected": -533.7297219669117, | |
| "loss": 0.0011, | |
| "rewards/chosen": 10.652042643229167, | |
| "rewards/margins": 32.71965343998928, | |
| "rewards/rejected": -22.06761079676011, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.09063499314755596, | |
| "grad_norm": 3.4276442527770996, | |
| "kl": 5.049632549285889, | |
| "learning_rate": 5.2243241517525754e-05, | |
| "logits/chosen": 8104922.666666667, | |
| "logits/rejected": -13931694.11764706, | |
| "logps/chosen": -311.51455078125, | |
| "logps/rejected": -416.35816865808823, | |
| "loss": 0.0276, | |
| "rewards/chosen": 8.877303059895834, | |
| "rewards/margins": 25.51912363089767, | |
| "rewards/rejected": -16.641820571001837, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.09100045682960256, | |
| "grad_norm": 2.8090860843658447, | |
| "kl": 3.980405807495117, | |
| "learning_rate": 5.192294972051992e-05, | |
| "logits/chosen": -16458994.461538462, | |
| "logits/rejected": -16097566.315789474, | |
| "logps/chosen": -326.67919921875, | |
| "logps/rejected": -503.0575657894737, | |
| "loss": 0.0045, | |
| "rewards/chosen": 10.4029294527494, | |
| "rewards/margins": 27.084651993353837, | |
| "rewards/rejected": -16.68172254060444, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.09136592051164916, | |
| "grad_norm": 4.2613701820373535, | |
| "kl": 4.3160295486450195, | |
| "learning_rate": 5.1602578878582776e-05, | |
| "logits/chosen": -13023320.0, | |
| "logits/rejected": -13718616.0, | |
| "logps/chosen": -329.45770263671875, | |
| "logps/rejected": -412.9193115234375, | |
| "loss": 0.0105, | |
| "rewards/chosen": 9.6162748336792, | |
| "rewards/margins": 26.780089378356934, | |
| "rewards/rejected": -17.163814544677734, | |
| "step": 250 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |