| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9803536345776032, |
| "eval_steps": 100, |
| "global_step": 126, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 1e-08, |
| "logits/chosen": -0.7174503803253174, |
| "logits/rejected": -0.6943084597587585, |
| "logps/chosen": -270.8456726074219, |
| "logps/rejected": -282.19085693359375, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.026765329763293266, |
| "rewards/margins": -0.03646162897348404, |
| "rewards/rejected": 0.009696293622255325, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 2e-08, |
| "logits/chosen": -0.7384400963783264, |
| "logits/rejected": -0.7221320271492004, |
| "logps/chosen": -258.61907958984375, |
| "logps/rejected": -281.6830749511719, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0018256169278174639, |
| "rewards/margins": 0.003697167383506894, |
| "rewards/rejected": -0.0018715504556894302, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 3e-08, |
| "logits/chosen": -0.6095221638679504, |
| "logits/rejected": -0.6629360914230347, |
| "logps/chosen": -286.78997802734375, |
| "logps/rejected": -273.3175048828125, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.006737734191119671, |
| "rewards/margins": -0.00976959615945816, |
| "rewards/rejected": 0.0030318615026772022, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4e-08, |
| "logits/chosen": -0.6651933789253235, |
| "logits/rejected": -0.7566149830818176, |
| "logps/chosen": -235.61293029785156, |
| "logps/rejected": -243.84149169921875, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": 0.004669443238526583, |
| "rewards/margins": 0.0008712877752259374, |
| "rewards/rejected": 0.0037981546483933926, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 5e-08, |
| "logits/chosen": -0.6452059149742126, |
| "logits/rejected": -0.6807385683059692, |
| "logps/chosen": -252.2994384765625, |
| "logps/rejected": -251.97396850585938, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.008676717057824135, |
| "rewards/margins": 0.0009128516539931297, |
| "rewards/rejected": 0.007763866800814867, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6e-08, |
| "logits/chosen": -0.7067058086395264, |
| "logits/rejected": -0.6684169769287109, |
| "logps/chosen": -230.00604248046875, |
| "logps/rejected": -228.9236297607422, |
| "loss": 0.6972, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.012713517993688583, |
| "rewards/margins": -0.027079811319708824, |
| "rewards/rejected": 0.014366289600729942, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 7e-08, |
| "logits/chosen": -0.7220852375030518, |
| "logits/rejected": -0.6751745939254761, |
| "logps/chosen": -232.22378540039062, |
| "logps/rejected": -241.50436401367188, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.0032491590827703476, |
| "rewards/margins": 0.008206035010516644, |
| "rewards/rejected": -0.01145519595593214, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8e-08, |
| "logits/chosen": -0.6238290071487427, |
| "logits/rejected": -0.6639167070388794, |
| "logps/chosen": -234.86361694335938, |
| "logps/rejected": -214.7284698486328, |
| "loss": 0.696, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": 0.008593764156103134, |
| "rewards/margins": -0.0219355970621109, |
| "rewards/rejected": 0.030529363080859184, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 9e-08, |
| "logits/chosen": -0.512151837348938, |
| "logits/rejected": -0.5617572069168091, |
| "logps/chosen": -189.43780517578125, |
| "logps/rejected": -214.44998168945312, |
| "loss": 0.6963, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.003417783882468939, |
| "rewards/margins": 0.009799075312912464, |
| "rewards/rejected": -0.006381290033459663, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1e-07, |
| "logits/chosen": -0.6672257781028748, |
| "logits/rejected": -0.7429981231689453, |
| "logps/chosen": -286.5278015136719, |
| "logps/rejected": -268.7224426269531, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": -0.0017739468021318316, |
| "rewards/margins": -0.01827608421444893, |
| "rewards/rejected": 0.0165021400898695, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.0999999999999999e-07, |
| "logits/chosen": -0.7219868302345276, |
| "logits/rejected": -0.791577160358429, |
| "logps/chosen": -247.4084930419922, |
| "logps/rejected": -248.8292236328125, |
| "loss": 0.6962, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": 0.019455352798104286, |
| "rewards/margins": 0.021109547466039658, |
| "rewards/rejected": -0.0016541974619030952, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.2e-07, |
| "logits/chosen": -0.6311923861503601, |
| "logits/rejected": -0.7258697748184204, |
| "logps/chosen": -331.468017578125, |
| "logps/rejected": -303.3436279296875, |
| "loss": 0.6955, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 0.005338191986083984, |
| "rewards/margins": -0.0022792313247919083, |
| "rewards/rejected": 0.0076174261048436165, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3e-07, |
| "logits/chosen": -0.6106204390525818, |
| "logits/rejected": -0.6809769868850708, |
| "logps/chosen": -271.89508056640625, |
| "logps/rejected": -248.4736328125, |
| "loss": 0.6959, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.004141634795814753, |
| "rewards/margins": -0.019872283563017845, |
| "rewards/rejected": 0.01573064923286438, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.4e-07, |
| "logits/chosen": -0.6956904530525208, |
| "logits/rejected": -0.7332131862640381, |
| "logps/chosen": -228.54507446289062, |
| "logps/rejected": -234.7319793701172, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 0.0003057911526411772, |
| "rewards/margins": 0.01566055603325367, |
| "rewards/rejected": -0.015354765579104424, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.5e-07, |
| "logits/chosen": -0.6350907683372498, |
| "logits/rejected": -0.5792465806007385, |
| "logps/chosen": -265.60833740234375, |
| "logps/rejected": -280.8433837890625, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.017145343124866486, |
| "rewards/margins": -0.023011833429336548, |
| "rewards/rejected": 0.005866494029760361, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.6e-07, |
| "logits/chosen": -0.6439922451972961, |
| "logits/rejected": -0.6449224352836609, |
| "logps/chosen": -255.7445068359375, |
| "logps/rejected": -224.29718017578125, |
| "loss": 0.6966, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": 0.006082021631300449, |
| "rewards/margins": -0.008805789053440094, |
| "rewards/rejected": 0.014887811616063118, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.7000000000000001e-07, |
| "logits/chosen": -0.6101562976837158, |
| "logits/rejected": -0.699042558670044, |
| "logps/chosen": -211.84664916992188, |
| "logps/rejected": -225.85964965820312, |
| "loss": 0.6973, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": 0.0015809923643246293, |
| "rewards/margins": -0.008058879524469376, |
| "rewards/rejected": 0.009639870375394821, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.8e-07, |
| "logits/chosen": -0.5861420631408691, |
| "logits/rejected": -0.717140793800354, |
| "logps/chosen": -202.82980346679688, |
| "logps/rejected": -201.53016662597656, |
| "loss": 0.688, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.011665353551506996, |
| "rewards/margins": 0.01814911887049675, |
| "rewards/rejected": -0.006483766250312328, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8999999999999998e-07, |
| "logits/chosen": -0.5231435298919678, |
| "logits/rejected": -0.6139582395553589, |
| "logps/chosen": -247.10012817382812, |
| "logps/rejected": -256.91961669921875, |
| "loss": 0.6956, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": -0.021320294588804245, |
| "rewards/margins": -0.023092210292816162, |
| "rewards/rejected": 0.0017719138413667679, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2e-07, |
| "logits/chosen": -0.527732253074646, |
| "logits/rejected": -0.588649332523346, |
| "logps/chosen": -256.5250549316406, |
| "logps/rejected": -260.16650390625, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0021958230063319206, |
| "rewards/margins": 0.023003416135907173, |
| "rewards/rejected": -0.02519924007356167, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.0999999999999997e-07, |
| "logits/chosen": -0.6151208281517029, |
| "logits/rejected": -0.721298098564148, |
| "logps/chosen": -189.48403930664062, |
| "logps/rejected": -204.5369110107422, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.014158200472593307, |
| "rewards/margins": -0.01336708664894104, |
| "rewards/rejected": -0.0007911152206361294, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.1999999999999998e-07, |
| "logits/chosen": -0.6183785200119019, |
| "logits/rejected": -0.6386263370513916, |
| "logps/chosen": -267.54571533203125, |
| "logps/rejected": -264.0722351074219, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.025133097544312477, |
| "rewards/margins": 0.010925769805908203, |
| "rewards/rejected": 0.014207325875759125, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.3e-07, |
| "logits/chosen": -0.6037736535072327, |
| "logits/rejected": -0.5792754292488098, |
| "logps/chosen": -258.5186462402344, |
| "logps/rejected": -264.8337097167969, |
| "loss": 0.6967, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.008844601921737194, |
| "rewards/margins": 0.003293616697192192, |
| "rewards/rejected": -0.012138217687606812, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.4e-07, |
| "logits/chosen": -0.6004233360290527, |
| "logits/rejected": -0.6128482222557068, |
| "logps/chosen": -165.5045623779297, |
| "logps/rejected": -165.1356201171875, |
| "loss": 0.6965, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0016356302658095956, |
| "rewards/margins": -0.011843698099255562, |
| "rewards/rejected": 0.0102080674842, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.5e-07, |
| "logits/chosen": -0.46822789311408997, |
| "logits/rejected": -0.6019323468208313, |
| "logps/chosen": -250.05691528320312, |
| "logps/rejected": -254.96939086914062, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.005440211854875088, |
| "rewards/margins": 0.008583704940974712, |
| "rewards/rejected": -0.014023915864527225, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.6e-07, |
| "logits/chosen": -0.6268489360809326, |
| "logits/rejected": -0.6111986041069031, |
| "logps/chosen": -219.6685791015625, |
| "logps/rejected": -228.7408447265625, |
| "loss": 0.6876, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.013573385775089264, |
| "rewards/margins": 0.01851603016257286, |
| "rewards/rejected": -0.004942642990499735, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.7e-07, |
| "logits/chosen": -0.6629205942153931, |
| "logits/rejected": -0.6896684765815735, |
| "logps/chosen": -270.8996887207031, |
| "logps/rejected": -271.5982971191406, |
| "loss": 0.6873, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": 0.0039288876578211784, |
| "rewards/margins": 0.02803238481283188, |
| "rewards/rejected": -0.024103496223688126, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.8e-07, |
| "logits/chosen": -0.6165743470191956, |
| "logits/rejected": -0.653393566608429, |
| "logps/chosen": -300.4944152832031, |
| "logps/rejected": -282.7535705566406, |
| "loss": 0.6859, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 0.007847840897738934, |
| "rewards/margins": 0.030035195872187614, |
| "rewards/rejected": -0.022187354043126106, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.9e-07, |
| "logits/chosen": -0.6094356179237366, |
| "logits/rejected": -0.6757500171661377, |
| "logps/chosen": -277.4384765625, |
| "logps/rejected": -250.0402069091797, |
| "loss": 0.6881, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.005152473691850901, |
| "rewards/margins": 0.008621355518698692, |
| "rewards/rejected": -0.013773828744888306, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3e-07, |
| "logits/chosen": -0.6621364951133728, |
| "logits/rejected": -0.7544094920158386, |
| "logps/chosen": -237.76211547851562, |
| "logps/rejected": -258.6220397949219, |
| "loss": 0.6857, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0019231722690165043, |
| "rewards/margins": 0.023751741275191307, |
| "rewards/rejected": -0.025674916803836823, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.1e-07, |
| "logits/chosen": -0.7078163623809814, |
| "logits/rejected": -0.7576731443405151, |
| "logps/chosen": -244.79898071289062, |
| "logps/rejected": -254.35121154785156, |
| "loss": 0.6861, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.039019349962472916, |
| "rewards/margins": 0.013844037428498268, |
| "rewards/rejected": -0.052863385528326035, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.2e-07, |
| "logits/chosen": -0.6455057263374329, |
| "logits/rejected": -0.6837339401245117, |
| "logps/chosen": -312.5347595214844, |
| "logps/rejected": -332.4432678222656, |
| "loss": 0.6769, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.010168202221393585, |
| "rewards/margins": 0.05173782259225845, |
| "rewards/rejected": -0.04156962037086487, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.3e-07, |
| "logits/chosen": -0.6815846562385559, |
| "logits/rejected": -0.6511753797531128, |
| "logps/chosen": -293.84759521484375, |
| "logps/rejected": -291.41094970703125, |
| "loss": 0.6418, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.022068671882152557, |
| "rewards/margins": 0.11345849931240082, |
| "rewards/rejected": -0.09138982743024826, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.4000000000000003e-07, |
| "logits/chosen": -0.6528204679489136, |
| "logits/rejected": -0.6717909574508667, |
| "logps/chosen": -322.498779296875, |
| "logps/rejected": -328.1177978515625, |
| "loss": 0.644, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.007202749140560627, |
| "rewards/margins": 0.10125970840454102, |
| "rewards/rejected": -0.09405697137117386, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.5e-07, |
| "logits/chosen": -0.6534676551818848, |
| "logits/rejected": -0.7868615388870239, |
| "logps/chosen": -374.2117004394531, |
| "logps/rejected": -334.72003173828125, |
| "loss": 0.6375, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.026431433856487274, |
| "rewards/margins": 0.12101886421442032, |
| "rewards/rejected": -0.09458744525909424, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.6e-07, |
| "logits/chosen": -0.7391141653060913, |
| "logits/rejected": -0.7862444519996643, |
| "logps/chosen": -411.762939453125, |
| "logps/rejected": -365.6845703125, |
| "loss": 0.6212, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": 0.054456405341625214, |
| "rewards/margins": 0.18117623031139374, |
| "rewards/rejected": -0.12671984732151031, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.7e-07, |
| "logits/chosen": -0.7733412981033325, |
| "logits/rejected": -0.7517014741897583, |
| "logps/chosen": -290.02618408203125, |
| "logps/rejected": -289.328857421875, |
| "loss": 0.6198, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.009054643101990223, |
| "rewards/margins": 0.17434418201446533, |
| "rewards/rejected": -0.16528955101966858, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.7999999999999996e-07, |
| "logits/chosen": -0.7194028496742249, |
| "logits/rejected": -0.7934839725494385, |
| "logps/chosen": -257.1732482910156, |
| "logps/rejected": -249.44032287597656, |
| "loss": 0.6133, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 0.011389782652258873, |
| "rewards/margins": 0.1557338535785675, |
| "rewards/rejected": -0.14434409141540527, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 3.8999999999999997e-07, |
| "logits/chosen": -0.5935586094856262, |
| "logits/rejected": -0.6306675672531128, |
| "logps/chosen": -271.8302917480469, |
| "logps/rejected": -261.1759033203125, |
| "loss": 0.603, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 0.007451685611158609, |
| "rewards/margins": 0.20253950357437134, |
| "rewards/rejected": -0.19508780539035797, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 4e-07, |
| "logits/chosen": -0.7403210997581482, |
| "logits/rejected": -0.7706661224365234, |
| "logps/chosen": -312.74578857421875, |
| "logps/rejected": -313.32806396484375, |
| "loss": 0.5993, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.023077581077814102, |
| "rewards/margins": 0.24176748096942902, |
| "rewards/rejected": -0.21868988871574402, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 4.0999999999999994e-07, |
| "logits/chosen": -0.699599027633667, |
| "logits/rejected": -0.6987614631652832, |
| "logps/chosen": -237.9510498046875, |
| "logps/rejected": -219.32186889648438, |
| "loss": 0.609, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.008079749532043934, |
| "rewards/margins": 0.1820226013660431, |
| "rewards/rejected": -0.17394284904003143, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 4.1999999999999995e-07, |
| "logits/chosen": -0.7053198218345642, |
| "logits/rejected": -0.7387905716896057, |
| "logps/chosen": -334.8210144042969, |
| "logps/rejected": -317.2434387207031, |
| "loss": 0.5986, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": 0.01476591732352972, |
| "rewards/margins": 0.2971000373363495, |
| "rewards/rejected": -0.2823341190814972, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.2999999999999996e-07, |
| "logits/chosen": -0.5891289710998535, |
| "logits/rejected": -0.6538792252540588, |
| "logps/chosen": -243.9444580078125, |
| "logps/rejected": -233.60150146484375, |
| "loss": 0.5911, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.008889580145478249, |
| "rewards/margins": 0.27189671993255615, |
| "rewards/rejected": -0.26300713419914246, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 4.3999999999999997e-07, |
| "logits/chosen": -0.5605867505073547, |
| "logits/rejected": -0.6283904910087585, |
| "logps/chosen": -225.13677978515625, |
| "logps/rejected": -225.9215545654297, |
| "loss": 0.5902, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.01296391524374485, |
| "rewards/margins": 0.19775745272636414, |
| "rewards/rejected": -0.21072134375572205, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 4.5e-07, |
| "logits/chosen": -0.5663571357727051, |
| "logits/rejected": -0.7296346426010132, |
| "logps/chosen": -267.0323791503906, |
| "logps/rejected": -252.5331573486328, |
| "loss": 0.5751, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.1021631732583046, |
| "rewards/margins": 0.20906060934066772, |
| "rewards/rejected": -0.3112238049507141, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 4.6e-07, |
| "logits/chosen": -0.7122434973716736, |
| "logits/rejected": -0.8210177421569824, |
| "logps/chosen": -271.46282958984375, |
| "logps/rejected": -260.20928955078125, |
| "loss": 0.5581, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.01567010208964348, |
| "rewards/margins": 0.3229547441005707, |
| "rewards/rejected": -0.33862486481666565, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 4.6999999999999995e-07, |
| "logits/chosen": -0.7021597027778625, |
| "logits/rejected": -0.7676805853843689, |
| "logps/chosen": -292.62701416015625, |
| "logps/rejected": -298.1327819824219, |
| "loss": 0.5636, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.03594281151890755, |
| "rewards/margins": 0.29724758863449097, |
| "rewards/rejected": -0.3331904113292694, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 4.8e-07, |
| "logits/chosen": -0.6902323365211487, |
| "logits/rejected": -0.7200605869293213, |
| "logps/chosen": -211.65472412109375, |
| "logps/rejected": -219.32192993164062, |
| "loss": 0.5759, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.07997211813926697, |
| "rewards/margins": 0.2292398363351822, |
| "rewards/rejected": -0.30921196937561035, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 4.9e-07, |
| "logits/chosen": -0.5574191808700562, |
| "logits/rejected": -0.5770147442817688, |
| "logps/chosen": -207.34197998046875, |
| "logps/rejected": -224.09259033203125, |
| "loss": 0.5567, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.09851586073637009, |
| "rewards/margins": 0.3251160979270935, |
| "rewards/rejected": -0.4236319661140442, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 5e-07, |
| "logits/chosen": -0.7693199515342712, |
| "logits/rejected": -0.8233805894851685, |
| "logps/chosen": -299.7784118652344, |
| "logps/rejected": -309.6235046386719, |
| "loss": 0.5627, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.08704928308725357, |
| "rewards/margins": 0.44622179865837097, |
| "rewards/rejected": -0.533271074295044, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_logits/chosen": -0.6139631867408752, |
| "eval_logits/rejected": -0.5974639654159546, |
| "eval_logps/chosen": -152.44834899902344, |
| "eval_logps/rejected": -153.41506958007812, |
| "eval_loss": 0.586656928062439, |
| "eval_rewards/accuracies": 0.692307710647583, |
| "eval_rewards/chosen": -0.2592608332633972, |
| "eval_rewards/margins": 0.06354018300771713, |
| "eval_rewards/rejected": -0.32280105352401733, |
| "eval_runtime": 18.621, |
| "eval_samples_per_second": 5.37, |
| "eval_steps_per_second": 0.698, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 4.92735454356513e-07, |
| "logits/chosen": -0.6150973439216614, |
| "logits/rejected": -0.7185875177383423, |
| "logps/chosen": -237.0604705810547, |
| "logps/rejected": -249.4574737548828, |
| "loss": 0.5567, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.263325035572052, |
| "rewards/margins": 0.26326507329940796, |
| "rewards/rejected": -0.52659010887146, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.7136400641330245e-07, |
| "logits/chosen": -0.559939980506897, |
| "logits/rejected": -0.5313056111335754, |
| "logps/chosen": -210.3485565185547, |
| "logps/rejected": -234.06793212890625, |
| "loss": 0.5509, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.17846447229385376, |
| "rewards/margins": 0.37211543321609497, |
| "rewards/rejected": -0.5505799651145935, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 4.3712768704277524e-07, |
| "logits/chosen": -0.6157305240631104, |
| "logits/rejected": -0.6810495853424072, |
| "logps/chosen": -282.2988586425781, |
| "logps/rejected": -283.375732421875, |
| "loss": 0.5324, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.2017279416322708, |
| "rewards/margins": 0.47284698486328125, |
| "rewards/rejected": -0.6745749115943909, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.920161866827889e-07, |
| "logits/chosen": -0.6584359407424927, |
| "logits/rejected": -0.7091121077537537, |
| "logps/chosen": -277.4266052246094, |
| "logps/rejected": -246.23489379882812, |
| "loss": 0.5346, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.23659390211105347, |
| "rewards/margins": 0.4666748046875, |
| "rewards/rejected": -0.7032687664031982, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 3.3865122176063385e-07, |
| "logits/chosen": -0.6134750247001648, |
| "logits/rejected": -0.5988832712173462, |
| "logps/chosen": -226.0696258544922, |
| "logps/rejected": -246.9676513671875, |
| "loss": 0.5041, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.2491825819015503, |
| "rewards/margins": 0.45121854543685913, |
| "rewards/rejected": -0.7004011273384094, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.801341700638307e-07, |
| "logits/chosen": -0.7020338177680969, |
| "logits/rejected": -0.7422819137573242, |
| "logps/chosen": -281.707763671875, |
| "logps/rejected": -252.8227081298828, |
| "loss": 0.5283, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.23370790481567383, |
| "rewards/margins": 0.40553176403045654, |
| "rewards/rejected": -0.6392396688461304, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.1986582993616925e-07, |
| "logits/chosen": -0.6352089047431946, |
| "logits/rejected": -0.6219425797462463, |
| "logps/chosen": -208.7581787109375, |
| "logps/rejected": -234.14163208007812, |
| "loss": 0.5487, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.3383124768733978, |
| "rewards/margins": 0.26009026169776917, |
| "rewards/rejected": -0.598402738571167, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.6134877823936607e-07, |
| "logits/chosen": -0.5805491209030151, |
| "logits/rejected": -0.682640552520752, |
| "logps/chosen": -186.20004272460938, |
| "logps/rejected": -197.09063720703125, |
| "loss": 0.5498, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.35932108759880066, |
| "rewards/margins": 0.39937281608581543, |
| "rewards/rejected": -0.7586938738822937, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.0798381331721107e-07, |
| "logits/chosen": -0.6779636740684509, |
| "logits/rejected": -0.7133126258850098, |
| "logps/chosen": -251.22116088867188, |
| "logps/rejected": -237.37893676757812, |
| "loss": 0.5179, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.2686101198196411, |
| "rewards/margins": 0.4475557804107666, |
| "rewards/rejected": -0.7161659002304077, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.28723129572247e-08, |
| "logits/chosen": -0.7219228148460388, |
| "logits/rejected": -0.6949824094772339, |
| "logps/chosen": -231.49847412109375, |
| "logps/rejected": -238.30551147460938, |
| "loss": 0.5349, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.2211376279592514, |
| "rewards/margins": 0.44752269983291626, |
| "rewards/rejected": -0.6686602830886841, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.863599358669755e-08, |
| "logits/chosen": -0.6804690957069397, |
| "logits/rejected": -0.6980258226394653, |
| "logps/chosen": -268.3139953613281, |
| "logps/rejected": -313.7171936035156, |
| "loss": 0.5252, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.29031863808631897, |
| "rewards/margins": 0.511509895324707, |
| "rewards/rejected": -0.8018285632133484, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.2645456434869965e-09, |
| "logits/chosen": -0.8130983710289001, |
| "logits/rejected": -0.8794119358062744, |
| "logps/chosen": -248.53091430664062, |
| "logps/rejected": -248.52560424804688, |
| "loss": 0.523, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.32957252860069275, |
| "rewards/margins": 0.4259745180606842, |
| "rewards/rejected": -0.7555469274520874, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.629798173904419, |
| "logits/rejected": -0.635986328125, |
| "logps/chosen": -281.06219482421875, |
| "logps/rejected": -273.4628601074219, |
| "loss": 0.5123, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.40146714448928833, |
| "rewards/margins": 0.5194033980369568, |
| "rewards/rejected": -0.9208705425262451, |
| "step": 126 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 126, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50000, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|