Instructions to use ConicCat/Test-LoRA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ConicCat/Test-LoRA with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("ConicCat/Test-LoRA", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 182, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005494505494505495, | |
| "grad_norm": 2.0219836235046387, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.29912787675857544, | |
| "logits/rejected": -0.26033276319503784, | |
| "logps/chosen": -1493.069580078125, | |
| "logps/rejected": -1464.670654296875, | |
| "loss": 0.6931473016738892, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01098901098901099, | |
| "grad_norm": 1.2694443464279175, | |
| "learning_rate": 1.3157894736842107e-07, | |
| "logits/chosen": -0.5480956435203552, | |
| "logits/rejected": -0.6893957853317261, | |
| "logps/chosen": -1354.66455078125, | |
| "logps/rejected": -1234.7755126953125, | |
| "loss": 0.6931473016738892, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.016483516483516484, | |
| "grad_norm": 1.7238407135009766, | |
| "learning_rate": 2.6315789473684213e-07, | |
| "logits/chosen": -0.27911028265953064, | |
| "logits/rejected": -0.146735280752182, | |
| "logps/chosen": -1381.031982421875, | |
| "logps/rejected": -1174.5887451171875, | |
| "loss": 0.6932641267776489, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.002829666016623378, | |
| "rewards/margins": 0.001636037603020668, | |
| "rewards/rejected": 0.001193628297187388, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 1.3951466083526611, | |
| "learning_rate": 3.9473684210526315e-07, | |
| "logits/chosen": -0.168782040476799, | |
| "logits/rejected": -0.25047171115875244, | |
| "logps/chosen": -1239.2984619140625, | |
| "logps/rejected": -1150.9674072265625, | |
| "loss": 0.6944670081138611, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.0013842176413163543, | |
| "rewards/margins": -0.0002266278606839478, | |
| "rewards/rejected": 0.0016108450945466757, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.027472527472527472, | |
| "grad_norm": 1.1389007568359375, | |
| "learning_rate": 5.263157894736843e-07, | |
| "logits/chosen": -0.6456503868103027, | |
| "logits/rejected": -0.6371781229972839, | |
| "logps/chosen": -1409.6326904296875, | |
| "logps/rejected": -1272.1806640625, | |
| "loss": 0.6931549906730652, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.00566960358992219, | |
| "rewards/margins": 0.0016335974214598536, | |
| "rewards/rejected": 0.004036006983369589, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03296703296703297, | |
| "grad_norm": 0.974063515663147, | |
| "learning_rate": 6.578947368421053e-07, | |
| "logits/chosen": -0.30573540925979614, | |
| "logits/rejected": -0.2751460373401642, | |
| "logps/chosen": -1334.066650390625, | |
| "logps/rejected": -1179.54248046875, | |
| "loss": 0.6933455467224121, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.015271691605448723, | |
| "rewards/margins": 0.002894277684390545, | |
| "rewards/rejected": 0.012377413921058178, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 1.2145659923553467, | |
| "learning_rate": 7.894736842105263e-07, | |
| "logits/chosen": 0.048936627805233, | |
| "logits/rejected": -0.05937391147017479, | |
| "logps/chosen": -1329.2144775390625, | |
| "logps/rejected": -1263.015869140625, | |
| "loss": 0.6919921040534973, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.012406377121806145, | |
| "rewards/margins": 0.0037618777714669704, | |
| "rewards/rejected": 0.008644499816000462, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 1.1337213516235352, | |
| "learning_rate": 9.210526315789474e-07, | |
| "logits/chosen": -0.6898252964019775, | |
| "logits/rejected": -0.5947282314300537, | |
| "logps/chosen": -1531.9923095703125, | |
| "logps/rejected": -1458.8387451171875, | |
| "loss": 0.692238450050354, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.010655415244400501, | |
| "rewards/margins": 0.006124755833297968, | |
| "rewards/rejected": 0.004530658945441246, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04945054945054945, | |
| "grad_norm": 1.4570075273513794, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "logits/chosen": -0.6079831123352051, | |
| "logits/rejected": -0.6475998759269714, | |
| "logps/chosen": -1284.917236328125, | |
| "logps/rejected": -1239.587158203125, | |
| "loss": 0.6913315653800964, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.008226280100643635, | |
| "rewards/margins": 0.00454014353454113, | |
| "rewards/rejected": 0.0036861367989331484, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 1.1168543100357056, | |
| "learning_rate": 1.1842105263157894e-06, | |
| "logits/chosen": -0.4832429587841034, | |
| "logits/rejected": -0.6784942150115967, | |
| "logps/chosen": -1411.264404296875, | |
| "logps/rejected": -1368.5498046875, | |
| "loss": 0.6921623349189758, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.017078913748264313, | |
| "rewards/margins": 0.003607435617595911, | |
| "rewards/rejected": 0.013471478596329689, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06043956043956044, | |
| "grad_norm": 1.6089200973510742, | |
| "learning_rate": 1.3157894736842106e-06, | |
| "logits/chosen": -0.3044881820678711, | |
| "logits/rejected": -0.4217117428779602, | |
| "logps/chosen": -1790.7430419921875, | |
| "logps/rejected": -1541.77587890625, | |
| "loss": 0.6924711465835571, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.017292898148298264, | |
| "rewards/margins": 0.0005286832456476986, | |
| "rewards/rejected": 0.016764217987656593, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 0.6998999714851379, | |
| "learning_rate": 1.4473684210526317e-06, | |
| "logits/chosen": -0.484161376953125, | |
| "logits/rejected": -0.4741247892379761, | |
| "logps/chosen": -1405.0679931640625, | |
| "logps/rejected": -1307.798095703125, | |
| "loss": 0.6923574209213257, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.03834030032157898, | |
| "rewards/margins": 0.016229379922151566, | |
| "rewards/rejected": 0.022110918536782265, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07142857142857142, | |
| "grad_norm": 0.7959760427474976, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "logits/chosen": -0.35663121938705444, | |
| "logits/rejected": -0.44265735149383545, | |
| "logps/chosen": -1244.1324462890625, | |
| "logps/rejected": -1132.490478515625, | |
| "loss": 0.6914532780647278, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0250022504478693, | |
| "rewards/margins": 0.010114078409969807, | |
| "rewards/rejected": 0.014888172969222069, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.8386003971099854, | |
| "learning_rate": 1.710526315789474e-06, | |
| "logits/chosen": -0.19147101044654846, | |
| "logits/rejected": -0.1299954354763031, | |
| "logps/chosen": -1534.652099609375, | |
| "logps/rejected": -1399.5792236328125, | |
| "loss": 0.6917384266853333, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.04474790394306183, | |
| "rewards/margins": 0.02114131860435009, | |
| "rewards/rejected": 0.023606587201356888, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08241758241758242, | |
| "grad_norm": 0.7331187725067139, | |
| "learning_rate": 1.8421052631578948e-06, | |
| "logits/chosen": -0.444730281829834, | |
| "logits/rejected": -0.5467236638069153, | |
| "logps/chosen": -1175.93017578125, | |
| "logps/rejected": -1152.0045166015625, | |
| "loss": 0.6896647214889526, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.01556220930069685, | |
| "rewards/margins": 0.008490944281220436, | |
| "rewards/rejected": 0.007071265950798988, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 0.7852977514266968, | |
| "learning_rate": 1.973684210526316e-06, | |
| "logits/chosen": -0.3976231515407562, | |
| "logits/rejected": -0.6108076572418213, | |
| "logps/chosen": -1739.89404296875, | |
| "logps/rejected": -1463.2957763671875, | |
| "loss": 0.6904494166374207, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.04959982633590698, | |
| "rewards/margins": 0.015834834426641464, | |
| "rewards/rejected": 0.03376498818397522, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09340659340659341, | |
| "grad_norm": 0.6691881418228149, | |
| "learning_rate": 2.105263157894737e-06, | |
| "logits/chosen": -0.35132962465286255, | |
| "logits/rejected": -0.4285813570022583, | |
| "logps/chosen": -1459.74755859375, | |
| "logps/rejected": -1310.574951171875, | |
| "loss": 0.6907796263694763, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.03758728504180908, | |
| "rewards/margins": 0.011371154338121414, | |
| "rewards/rejected": 0.026216136291623116, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0989010989010989, | |
| "grad_norm": 0.8749127984046936, | |
| "learning_rate": 2.236842105263158e-06, | |
| "logits/chosen": -0.4039854407310486, | |
| "logits/rejected": -0.5412226319313049, | |
| "logps/chosen": -1535.715087890625, | |
| "logps/rejected": -1592.76123046875, | |
| "loss": 0.6916483044624329, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.029426174238324165, | |
| "rewards/margins": 0.004588168114423752, | |
| "rewards/rejected": 0.024838006123900414, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1043956043956044, | |
| "grad_norm": 0.6890782713890076, | |
| "learning_rate": 2.368421052631579e-06, | |
| "logits/chosen": -0.5154158473014832, | |
| "logits/rejected": -0.4239461421966553, | |
| "logps/chosen": -1366.4837646484375, | |
| "logps/rejected": -1341.562744140625, | |
| "loss": 0.6925311088562012, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.02481229603290558, | |
| "rewards/margins": 0.014975612983107567, | |
| "rewards/rejected": 0.009836683049798012, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.7121235728263855, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -0.24524424970149994, | |
| "logits/rejected": -0.1629214882850647, | |
| "logps/chosen": -1579.9754638671875, | |
| "logps/rejected": -1489.90625, | |
| "loss": 0.6927945613861084, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.008585358038544655, | |
| "rewards/margins": 0.009461937472224236, | |
| "rewards/rejected": -0.0008765789680182934, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 0.7799815535545349, | |
| "learning_rate": 2.484662576687117e-06, | |
| "logits/chosen": -0.7088998556137085, | |
| "logits/rejected": -0.6623613834381104, | |
| "logps/chosen": -1458.49853515625, | |
| "logps/rejected": -1424.711181640625, | |
| "loss": 0.6919652819633484, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.00381654710508883, | |
| "rewards/margins": 0.008333091624081135, | |
| "rewards/rejected": -0.0045165447518229485, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.12087912087912088, | |
| "grad_norm": 0.9595739245414734, | |
| "learning_rate": 2.4693251533742334e-06, | |
| "logits/chosen": -0.18136945366859436, | |
| "logits/rejected": -0.2704053819179535, | |
| "logps/chosen": -1288.148193359375, | |
| "logps/rejected": -1242.779296875, | |
| "loss": 0.6891352534294128, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.004841099493205547, | |
| "rewards/margins": 0.010067347437143326, | |
| "rewards/rejected": -0.014908447861671448, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12637362637362637, | |
| "grad_norm": 0.8795596957206726, | |
| "learning_rate": 2.45398773006135e-06, | |
| "logits/chosen": -0.5372273325920105, | |
| "logits/rejected": -0.4621075391769409, | |
| "logps/chosen": -1187.1552734375, | |
| "logps/rejected": -1210.7431640625, | |
| "loss": 0.6919819116592407, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.02427225187420845, | |
| "rewards/margins": 0.0051599410362541676, | |
| "rewards/rejected": -0.02943219244480133, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 0.4124164581298828, | |
| "learning_rate": 2.4386503067484667e-06, | |
| "logits/chosen": -0.6080984473228455, | |
| "logits/rejected": -0.6881457567214966, | |
| "logps/chosen": -1473.2374267578125, | |
| "logps/rejected": -1214.7386474609375, | |
| "loss": 0.6928576231002808, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.050905995070934296, | |
| "rewards/margins": -0.004748010542243719, | |
| "rewards/rejected": -0.04615798965096474, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13736263736263737, | |
| "grad_norm": 0.2888234555721283, | |
| "learning_rate": 2.423312883435583e-06, | |
| "logits/chosen": -0.31380707025527954, | |
| "logits/rejected": -0.2592722773551941, | |
| "logps/chosen": -1513.1060791015625, | |
| "logps/rejected": -1577.8150634765625, | |
| "loss": 0.6926202178001404, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.06563874334096909, | |
| "rewards/margins": 0.008732328191399574, | |
| "rewards/rejected": -0.07437106966972351, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.5071586966514587, | |
| "learning_rate": 2.4079754601226995e-06, | |
| "logits/chosen": -0.6376692056655884, | |
| "logits/rejected": -0.6288321614265442, | |
| "logps/chosen": -1425.6273193359375, | |
| "logps/rejected": -1174.107421875, | |
| "loss": 0.6917777061462402, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.039514925330877304, | |
| "rewards/margins": -0.0018771695904433727, | |
| "rewards/rejected": -0.03763775900006294, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14835164835164835, | |
| "grad_norm": 0.7409914135932922, | |
| "learning_rate": 2.392638036809816e-06, | |
| "logits/chosen": -0.37571296095848083, | |
| "logits/rejected": -0.3492288589477539, | |
| "logps/chosen": -1434.89697265625, | |
| "logps/rejected": -1126.47314453125, | |
| "loss": 0.6918377876281738, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.019896959885954857, | |
| "rewards/margins": 0.004404466599225998, | |
| "rewards/rejected": -0.024301424622535706, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 1.4375437498092651, | |
| "learning_rate": 2.3773006134969327e-06, | |
| "logits/chosen": -0.4284808337688446, | |
| "logits/rejected": -0.4831678867340088, | |
| "logps/chosen": -1346.5638427734375, | |
| "logps/rejected": -1136.7266845703125, | |
| "loss": 0.6918051838874817, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.022301796823740005, | |
| "rewards/margins": -0.003937081433832645, | |
| "rewards/rejected": -0.018364714458584785, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15934065934065933, | |
| "grad_norm": 0.48184096813201904, | |
| "learning_rate": 2.3619631901840493e-06, | |
| "logits/chosen": -0.6718910336494446, | |
| "logits/rejected": -0.7271767854690552, | |
| "logps/chosen": -1507.7259521484375, | |
| "logps/rejected": -1534.1622314453125, | |
| "loss": 0.6906634569168091, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.05005672574043274, | |
| "rewards/margins": 0.0061715878546237946, | |
| "rewards/rejected": 0.043885137885808945, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.6680644750595093, | |
| "learning_rate": 2.346625766871166e-06, | |
| "logits/chosen": -0.5794727206230164, | |
| "logits/rejected": -0.3858964741230011, | |
| "logps/chosen": -1158.666748046875, | |
| "logps/rejected": -1081.162109375, | |
| "loss": 0.6908907890319824, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.012693958356976509, | |
| "rewards/margins": 0.008182154037058353, | |
| "rewards/rejected": 0.004511804785579443, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17032967032967034, | |
| "grad_norm": 0.9391990900039673, | |
| "learning_rate": 2.331288343558282e-06, | |
| "logits/chosen": -0.5199452638626099, | |
| "logits/rejected": -0.6326333284378052, | |
| "logps/chosen": -1681.7022705078125, | |
| "logps/rejected": -1511.8980712890625, | |
| "loss": 0.6941480040550232, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.027041777968406677, | |
| "rewards/margins": -0.001771755749359727, | |
| "rewards/rejected": 0.02881353348493576, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 0.46425074338912964, | |
| "learning_rate": 2.3159509202453988e-06, | |
| "logits/chosen": -0.11656185239553452, | |
| "logits/rejected": -0.18580833077430725, | |
| "logps/chosen": -1278.0001220703125, | |
| "logps/rejected": -1106.93408203125, | |
| "loss": 0.692402720451355, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.022857239469885826, | |
| "rewards/margins": 0.0052628712728619576, | |
| "rewards/rejected": -0.02812010981142521, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1813186813186813, | |
| "grad_norm": 0.6273348331451416, | |
| "learning_rate": 2.3006134969325154e-06, | |
| "logits/chosen": -0.5983235836029053, | |
| "logits/rejected": -0.4410645663738251, | |
| "logps/chosen": -1521.9276123046875, | |
| "logps/rejected": -1621.8740234375, | |
| "loss": 0.6908812522888184, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.011174187064170837, | |
| "rewards/margins": 0.010386137291789055, | |
| "rewards/rejected": -0.02156032808125019, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.18681318681318682, | |
| "grad_norm": 0.9936891794204712, | |
| "learning_rate": 2.2852760736196324e-06, | |
| "logits/chosen": -0.6642717719078064, | |
| "logits/rejected": -0.6868173480033875, | |
| "logps/chosen": -1817.156494140625, | |
| "logps/rejected": -1669.7921142578125, | |
| "loss": 0.6925203800201416, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.02615175023674965, | |
| "rewards/margins": -0.004008379764854908, | |
| "rewards/rejected": -0.022143369540572166, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.297181099653244, | |
| "learning_rate": 2.2699386503067486e-06, | |
| "logits/chosen": -0.8816071152687073, | |
| "logits/rejected": -0.7729086875915527, | |
| "logps/chosen": -1493.5185546875, | |
| "logps/rejected": -1499.0738525390625, | |
| "loss": 0.6920033693313599, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.04162220656871796, | |
| "rewards/margins": 0.004699449986219406, | |
| "rewards/rejected": -0.04632166028022766, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 0.6562932729721069, | |
| "learning_rate": 2.2546012269938652e-06, | |
| "logits/chosen": -0.4074722230434418, | |
| "logits/rejected": -0.5253076553344727, | |
| "logps/chosen": -1445.3853759765625, | |
| "logps/rejected": -1373.7484130859375, | |
| "loss": 0.6925094127655029, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.038453079760074615, | |
| "rewards/margins": -0.002792082028463483, | |
| "rewards/rejected": -0.0356609970331192, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2032967032967033, | |
| "grad_norm": 0.42993712425231934, | |
| "learning_rate": 2.239263803680982e-06, | |
| "logits/chosen": -0.10876737534999847, | |
| "logits/rejected": -0.1684437394142151, | |
| "logps/chosen": -1450.543701171875, | |
| "logps/rejected": -1280.059326171875, | |
| "loss": 0.6924898624420166, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.0590565986931324, | |
| "rewards/margins": -0.012079971842467785, | |
| "rewards/rejected": -0.04697662591934204, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2087912087912088, | |
| "grad_norm": 0.6291781067848206, | |
| "learning_rate": 2.2239263803680985e-06, | |
| "logits/chosen": -0.4873208999633789, | |
| "logits/rejected": -0.5962563753128052, | |
| "logps/chosen": -1363.3477783203125, | |
| "logps/rejected": -1316.7086181640625, | |
| "loss": 0.6922313570976257, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.03899176046252251, | |
| "rewards/margins": 0.0033729556016623974, | |
| "rewards/rejected": -0.04236472398042679, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 0.6414478421211243, | |
| "learning_rate": 2.208588957055215e-06, | |
| "logits/chosen": -0.3744695782661438, | |
| "logits/rejected": -0.45585522055625916, | |
| "logps/chosen": -1323.5474853515625, | |
| "logps/rejected": -1350.0068359375, | |
| "loss": 0.6929305791854858, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0353596955537796, | |
| "rewards/margins": 0.007274856325238943, | |
| "rewards/rejected": -0.04263455420732498, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 1.121484398841858, | |
| "learning_rate": 2.1932515337423317e-06, | |
| "logits/chosen": -0.3999641537666321, | |
| "logits/rejected": -0.5990539789199829, | |
| "logps/chosen": -1318.0543212890625, | |
| "logps/rejected": -1136.1405029296875, | |
| "loss": 0.6921414136886597, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0097041679546237, | |
| "rewards/margins": 0.002448990475386381, | |
| "rewards/rejected": -0.012153157964348793, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22527472527472528, | |
| "grad_norm": 1.376361608505249, | |
| "learning_rate": 2.177914110429448e-06, | |
| "logits/chosen": -0.538985550403595, | |
| "logits/rejected": -0.3736581802368164, | |
| "logps/chosen": -1605.4700927734375, | |
| "logps/rejected": -1410.381103515625, | |
| "loss": 0.6900919079780579, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.015452098101377487, | |
| "rewards/margins": 0.001963262911885977, | |
| "rewards/rejected": -0.01741536147892475, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.9494011402130127, | |
| "learning_rate": 2.1625766871165645e-06, | |
| "logits/chosen": -0.3661588430404663, | |
| "logits/rejected": -0.30248114466667175, | |
| "logps/chosen": -1482.0816650390625, | |
| "logps/rejected": -1587.1827392578125, | |
| "loss": 0.6923238635063171, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.011823957785964012, | |
| "rewards/margins": 0.004251518286764622, | |
| "rewards/rejected": -0.01607547700405121, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.23626373626373626, | |
| "grad_norm": 0.9896334409713745, | |
| "learning_rate": 2.147239263803681e-06, | |
| "logits/chosen": -0.5797193646430969, | |
| "logits/rejected": -0.6257858276367188, | |
| "logps/chosen": -1530.91552734375, | |
| "logps/rejected": -1547.6806640625, | |
| "loss": 0.6907969117164612, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.021539613604545593, | |
| "rewards/margins": 0.01467025838792324, | |
| "rewards/rejected": -0.036209866404533386, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 0.9827613830566406, | |
| "learning_rate": 2.1319018404907978e-06, | |
| "logits/chosen": -0.7638057470321655, | |
| "logits/rejected": -0.8149251937866211, | |
| "logps/chosen": -1518.649169921875, | |
| "logps/rejected": -1399.217529296875, | |
| "loss": 0.6919956207275391, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.00867112074047327, | |
| "rewards/margins": 0.0037965585943311453, | |
| "rewards/rejected": -0.01246767956763506, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24725274725274726, | |
| "grad_norm": 1.2675107717514038, | |
| "learning_rate": 2.1165644171779144e-06, | |
| "logits/chosen": -0.2081916481256485, | |
| "logits/rejected": -0.17389225959777832, | |
| "logps/chosen": -1245.876708984375, | |
| "logps/rejected": -1316.34033203125, | |
| "loss": 0.6916300654411316, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.0010032318532466888, | |
| "rewards/margins": 0.013676752336323261, | |
| "rewards/rejected": -0.012673520483076572, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.25274725274725274, | |
| "grad_norm": 0.6310904622077942, | |
| "learning_rate": 2.101226993865031e-06, | |
| "logits/chosen": -0.6055498123168945, | |
| "logits/rejected": -0.5735322833061218, | |
| "logps/chosen": -1330.6837158203125, | |
| "logps/rejected": -1323.1474609375, | |
| "loss": 0.6929526925086975, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.03848934546113014, | |
| "rewards/margins": 0.0024320981465280056, | |
| "rewards/rejected": 0.0360572449862957, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.25824175824175827, | |
| "grad_norm": 0.5979546308517456, | |
| "learning_rate": 2.085889570552147e-06, | |
| "logits/chosen": -0.19976076483726501, | |
| "logits/rejected": -0.3477155566215515, | |
| "logps/chosen": -1781.2822265625, | |
| "logps/rejected": -1497.507080078125, | |
| "loss": 0.691817045211792, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.057623643428087234, | |
| "rewards/margins": 0.01321370154619217, | |
| "rewards/rejected": 0.044409941881895065, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 1.287549376487732, | |
| "learning_rate": 2.070552147239264e-06, | |
| "logits/chosen": -0.09788006544113159, | |
| "logits/rejected": -0.1335160732269287, | |
| "logps/chosen": -1499.1783447265625, | |
| "logps/rejected": -1211.41064453125, | |
| "loss": 0.6919518113136292, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.03057347610592842, | |
| "rewards/margins": 0.01761637255549431, | |
| "rewards/rejected": 0.012957105413079262, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2692307692307692, | |
| "grad_norm": 0.6202033758163452, | |
| "learning_rate": 2.0552147239263804e-06, | |
| "logits/chosen": -0.31665268540382385, | |
| "logits/rejected": -0.3232308626174927, | |
| "logps/chosen": -1485.8748779296875, | |
| "logps/rejected": -1326.1854248046875, | |
| "loss": 0.6928448677062988, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.07038865238428116, | |
| "rewards/margins": 0.009158019907772541, | |
| "rewards/rejected": 0.061230629682540894, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.5841394066810608, | |
| "learning_rate": 2.039877300613497e-06, | |
| "logits/chosen": -0.11269676685333252, | |
| "logits/rejected": -0.12115436792373657, | |
| "logps/chosen": -1302.87255859375, | |
| "logps/rejected": -1420.4437255859375, | |
| "loss": 0.6917054057121277, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.04229995980858803, | |
| "rewards/margins": 0.0015316582284867764, | |
| "rewards/rejected": 0.04076829552650452, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2802197802197802, | |
| "grad_norm": 0.7443718910217285, | |
| "learning_rate": 2.0245398773006137e-06, | |
| "logits/chosen": -0.49943816661834717, | |
| "logits/rejected": -0.5221395492553711, | |
| "logps/chosen": -1615.616943359375, | |
| "logps/rejected": -1565.77783203125, | |
| "loss": 0.6914383172988892, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.04047242924571037, | |
| "rewards/margins": 0.011208095587790012, | |
| "rewards/rejected": 0.029264334589242935, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 1.0812468528747559, | |
| "learning_rate": 2.0092024539877303e-06, | |
| "logits/chosen": -0.5125001072883606, | |
| "logits/rejected": -0.5849840641021729, | |
| "logps/chosen": -1389.961181640625, | |
| "logps/rejected": -1265.2818603515625, | |
| "loss": 0.6922411322593689, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.008559723384678364, | |
| "rewards/margins": 0.001604671822860837, | |
| "rewards/rejected": 0.006955050863325596, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.29120879120879123, | |
| "grad_norm": 1.38298761844635, | |
| "learning_rate": 1.9938650306748465e-06, | |
| "logits/chosen": -0.8314374685287476, | |
| "logits/rejected": -0.8688368797302246, | |
| "logps/chosen": -1503.9605712890625, | |
| "logps/rejected": -1391.91357421875, | |
| "loss": 0.6904184818267822, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.0023605534806847572, | |
| "rewards/margins": 0.010243010707199574, | |
| "rewards/rejected": -0.01260356418788433, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2967032967032967, | |
| "grad_norm": 0.7907947301864624, | |
| "learning_rate": 1.9785276073619635e-06, | |
| "logits/chosen": -0.4663585126399994, | |
| "logits/rejected": -0.5531838536262512, | |
| "logps/chosen": -1565.290771484375, | |
| "logps/rejected": -1368.54833984375, | |
| "loss": 0.6908788681030273, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.04032035544514656, | |
| "rewards/margins": 0.011873043142259121, | |
| "rewards/rejected": 0.028447313234210014, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3021978021978022, | |
| "grad_norm": 1.203404426574707, | |
| "learning_rate": 1.96319018404908e-06, | |
| "logits/chosen": -0.38841575384140015, | |
| "logits/rejected": -0.43689727783203125, | |
| "logps/chosen": -1580.765625, | |
| "logps/rejected": -1455.60498046875, | |
| "loss": 0.6918849945068359, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.011032314039766788, | |
| "rewards/margins": 0.009747497737407684, | |
| "rewards/rejected": 0.0012848172336816788, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.7074547410011292, | |
| "learning_rate": 1.9478527607361967e-06, | |
| "logits/chosen": -0.3431719243526459, | |
| "logits/rejected": -0.2189161628484726, | |
| "logps/chosen": -1363.068603515625, | |
| "logps/rejected": -1273.5313720703125, | |
| "loss": 0.6918089389801025, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.029067430645227432, | |
| "rewards/margins": 0.013189269229769707, | |
| "rewards/rejected": 0.015878159552812576, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3131868131868132, | |
| "grad_norm": 0.8552976250648499, | |
| "learning_rate": 1.932515337423313e-06, | |
| "logits/chosen": -0.3242160379886627, | |
| "logits/rejected": -0.36963948607444763, | |
| "logps/chosen": -1408.054443359375, | |
| "logps/rejected": -1515.0474853515625, | |
| "loss": 0.691418468952179, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.02214316464960575, | |
| "rewards/margins": 0.008005449548363686, | |
| "rewards/rejected": 0.01413771416991949, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.31868131868131866, | |
| "grad_norm": 0.9317906498908997, | |
| "learning_rate": 1.9171779141104296e-06, | |
| "logits/chosen": -0.651016354560852, | |
| "logits/rejected": -0.646873414516449, | |
| "logps/chosen": -1354.4619140625, | |
| "logps/rejected": -1355.021728515625, | |
| "loss": 0.6914571523666382, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.009268360212445259, | |
| "rewards/margins": 0.00419324915856123, | |
| "rewards/rejected": 0.005075111519545317, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3241758241758242, | |
| "grad_norm": 0.6546081304550171, | |
| "learning_rate": 1.9018404907975464e-06, | |
| "logits/chosen": -0.4001083970069885, | |
| "logits/rejected": -0.34092509746551514, | |
| "logps/chosen": -1422.7691650390625, | |
| "logps/rejected": -1336.7745361328125, | |
| "loss": 0.6927464008331299, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.04447000473737717, | |
| "rewards/margins": 0.011220641434192657, | |
| "rewards/rejected": 0.03324935957789421, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 1.2057020664215088, | |
| "learning_rate": 1.8865030674846626e-06, | |
| "logits/chosen": -0.40792492032051086, | |
| "logits/rejected": -0.41026824712753296, | |
| "logps/chosen": -1616.24462890625, | |
| "logps/rejected": -1496.211669921875, | |
| "loss": 0.6902381181716919, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.019096756353974342, | |
| "rewards/margins": 0.012553432956337929, | |
| "rewards/rejected": 0.00654332060366869, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.33516483516483514, | |
| "grad_norm": 0.8189549446105957, | |
| "learning_rate": 1.8711656441717794e-06, | |
| "logits/chosen": -0.40066254138946533, | |
| "logits/rejected": -0.5438181757926941, | |
| "logps/chosen": -1495.165771484375, | |
| "logps/rejected": -1468.3994140625, | |
| "loss": 0.6911718845367432, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.026379263028502464, | |
| "rewards/margins": 0.010899864137172699, | |
| "rewards/rejected": 0.01547939982265234, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.34065934065934067, | |
| "grad_norm": 0.7142252922058105, | |
| "learning_rate": 1.855828220858896e-06, | |
| "logits/chosen": -0.4213395416736603, | |
| "logits/rejected": -0.4220888316631317, | |
| "logps/chosen": -1588.5791015625, | |
| "logps/rejected": -1450.305908203125, | |
| "loss": 0.6918849945068359, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.044086188077926636, | |
| "rewards/margins": 0.021000908687710762, | |
| "rewards/rejected": 0.023085277527570724, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.34615384615384615, | |
| "grad_norm": 0.931616485118866, | |
| "learning_rate": 1.8404907975460124e-06, | |
| "logits/chosen": -0.4536420404911041, | |
| "logits/rejected": -0.553905189037323, | |
| "logps/chosen": -1479.9383544921875, | |
| "logps/rejected": -1247.5972900390625, | |
| "loss": 0.6907938122749329, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0278279110789299, | |
| "rewards/margins": 0.017392883077263832, | |
| "rewards/rejected": 0.010435027070343494, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 1.1059670448303223, | |
| "learning_rate": 1.825153374233129e-06, | |
| "logits/chosen": -0.1391632854938507, | |
| "logits/rejected": -0.29146745800971985, | |
| "logps/chosen": -1544.81103515625, | |
| "logps/rejected": -1467.8116455078125, | |
| "loss": 0.6943617463111877, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.004199332557618618, | |
| "rewards/margins": -0.0030631115660071373, | |
| "rewards/rejected": 0.007262444589287043, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 2.532100200653076, | |
| "learning_rate": 1.8098159509202457e-06, | |
| "logits/chosen": -0.4176465570926666, | |
| "logits/rejected": -0.47569915652275085, | |
| "logps/chosen": -1727.229736328125, | |
| "logps/rejected": -1618.333984375, | |
| "loss": 0.6903217434883118, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.008169951848685741, | |
| "rewards/margins": 0.0118106072768569, | |
| "rewards/rejected": -0.01998055912554264, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3626373626373626, | |
| "grad_norm": 0.8319610953330994, | |
| "learning_rate": 1.794478527607362e-06, | |
| "logits/chosen": -0.5535542964935303, | |
| "logits/rejected": -0.45863211154937744, | |
| "logps/chosen": -1296.4996337890625, | |
| "logps/rejected": -1305.611083984375, | |
| "loss": 0.6914780139923096, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.003835830604657531, | |
| "rewards/margins": 0.00748834665864706, | |
| "rewards/rejected": -0.0036525153554975986, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.36813186813186816, | |
| "grad_norm": 1.074711799621582, | |
| "learning_rate": 1.7791411042944787e-06, | |
| "logits/chosen": -0.6282920241355896, | |
| "logits/rejected": -0.6052903532981873, | |
| "logps/chosen": -1367.955078125, | |
| "logps/rejected": -1418.4644775390625, | |
| "loss": 0.691602349281311, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.008508214727044106, | |
| "rewards/margins": 0.0025658695958554745, | |
| "rewards/rejected": -0.011074084788560867, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 1.048543930053711, | |
| "learning_rate": 1.7638036809815953e-06, | |
| "logits/chosen": -0.41451916098594666, | |
| "logits/rejected": -0.44973787665367126, | |
| "logps/chosen": -1594.5008544921875, | |
| "logps/rejected": -1379.6549072265625, | |
| "loss": 0.6915735602378845, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.013657140545547009, | |
| "rewards/margins": 0.010285510681569576, | |
| "rewards/rejected": 0.003371629398316145, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3791208791208791, | |
| "grad_norm": 1.4657976627349854, | |
| "learning_rate": 1.7484662576687117e-06, | |
| "logits/chosen": -0.42228737473487854, | |
| "logits/rejected": -0.5371153354644775, | |
| "logps/chosen": -1440.2550048828125, | |
| "logps/rejected": -1317.087890625, | |
| "loss": 0.6919211745262146, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.004323349334299564, | |
| "rewards/margins": 0.007630740292370319, | |
| "rewards/rejected": -0.0033073904924094677, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.7367867827415466, | |
| "learning_rate": 1.7331288343558283e-06, | |
| "logits/chosen": -0.741744875907898, | |
| "logits/rejected": -0.6823490858078003, | |
| "logps/chosen": -1317.0145263671875, | |
| "logps/rejected": -1323.838623046875, | |
| "loss": 0.6901740431785583, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.011306552216410637, | |
| "rewards/margins": 0.013367671519517899, | |
| "rewards/rejected": -0.0020611193031072617, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3901098901098901, | |
| "grad_norm": 1.0581424236297607, | |
| "learning_rate": 1.717791411042945e-06, | |
| "logits/chosen": -0.36263424158096313, | |
| "logits/rejected": -0.37243887782096863, | |
| "logps/chosen": -1136.444580078125, | |
| "logps/rejected": -1088.967529296875, | |
| "loss": 0.6923285722732544, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.009710215963423252, | |
| "rewards/margins": 0.0008770937565714121, | |
| "rewards/rejected": 0.008833122439682484, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 0.9798324704170227, | |
| "learning_rate": 1.7024539877300614e-06, | |
| "logits/chosen": -0.3991851508617401, | |
| "logits/rejected": -0.3783729374408722, | |
| "logps/chosen": -1416.0003662109375, | |
| "logps/rejected": -1323.023681640625, | |
| "loss": 0.6924745440483093, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.013836746104061604, | |
| "rewards/margins": 0.005006732419133186, | |
| "rewards/rejected": 0.008830012753605843, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.4010989010989011, | |
| "grad_norm": 0.8338879346847534, | |
| "learning_rate": 1.687116564417178e-06, | |
| "logits/chosen": -0.19355978071689606, | |
| "logits/rejected": -0.2498997300863266, | |
| "logps/chosen": -1318.318359375, | |
| "logps/rejected": -1202.7623291015625, | |
| "loss": 0.6920049786567688, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.010977883823215961, | |
| "rewards/margins": 0.0031725564040243626, | |
| "rewards/rejected": 0.0078053283505141735, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4065934065934066, | |
| "grad_norm": 0.8426941633224487, | |
| "learning_rate": 1.6717791411042946e-06, | |
| "logits/chosen": -0.46479836106300354, | |
| "logits/rejected": -0.42796429991722107, | |
| "logps/chosen": -951.9476928710938, | |
| "logps/rejected": -1005.59521484375, | |
| "loss": 0.6904844045639038, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.011304330080747604, | |
| "rewards/margins": 0.0059456657618284225, | |
| "rewards/rejected": 0.005358664318919182, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.41208791208791207, | |
| "grad_norm": 1.5550216436386108, | |
| "learning_rate": 1.656441717791411e-06, | |
| "logits/chosen": -0.6027710437774658, | |
| "logits/rejected": -0.632501482963562, | |
| "logps/chosen": -1343.49755859375, | |
| "logps/rejected": -1283.693115234375, | |
| "loss": 0.6909563541412354, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.006782836746424437, | |
| "rewards/margins": 0.009988861158490181, | |
| "rewards/rejected": -0.003206023946404457, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 0.9994601607322693, | |
| "learning_rate": 1.6411042944785276e-06, | |
| "logits/chosen": -0.5772875547409058, | |
| "logits/rejected": -0.5767691731452942, | |
| "logps/chosen": -1284.95263671875, | |
| "logps/rejected": -1317.3885498046875, | |
| "loss": 0.6914191842079163, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.002038019709289074, | |
| "rewards/margins": 0.01685798540711403, | |
| "rewards/rejected": -0.01481996476650238, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4230769230769231, | |
| "grad_norm": 1.4171066284179688, | |
| "learning_rate": 1.6257668711656445e-06, | |
| "logits/chosen": -0.4665534794330597, | |
| "logits/rejected": -0.432694673538208, | |
| "logps/chosen": -1695.040283203125, | |
| "logps/rejected": -1551.968505859375, | |
| "loss": 0.6929216384887695, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.02615145593881607, | |
| "rewards/margins": -0.0027590510435402393, | |
| "rewards/rejected": -0.02339240349829197, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 1.1727432012557983, | |
| "learning_rate": 1.6104294478527606e-06, | |
| "logits/chosen": -0.5112898349761963, | |
| "logits/rejected": -0.5043646097183228, | |
| "logps/chosen": -1548.087646484375, | |
| "logps/rejected": -1400.1192626953125, | |
| "loss": 0.6917024254798889, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.05165726691484451, | |
| "rewards/margins": -0.012301253154873848, | |
| "rewards/rejected": -0.039356015622615814, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.4340659340659341, | |
| "grad_norm": 0.4238358736038208, | |
| "learning_rate": 1.5950920245398775e-06, | |
| "logits/chosen": -0.30220383405685425, | |
| "logits/rejected": -0.300650417804718, | |
| "logps/chosen": -1494.92333984375, | |
| "logps/rejected": -1436.374267578125, | |
| "loss": 0.6916438341140747, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.05291643738746643, | |
| "rewards/margins": -0.008324610069394112, | |
| "rewards/rejected": -0.04459183290600777, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.9021973013877869, | |
| "learning_rate": 1.579754601226994e-06, | |
| "logits/chosen": -0.10122132301330566, | |
| "logits/rejected": -0.2252417355775833, | |
| "logps/chosen": -1179.5601806640625, | |
| "logps/rejected": -1048.575439453125, | |
| "loss": 0.6910136342048645, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.021670779213309288, | |
| "rewards/margins": 0.0014739391626790166, | |
| "rewards/rejected": -0.023144718259572983, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.44505494505494503, | |
| "grad_norm": 0.9659970998764038, | |
| "learning_rate": 1.5644171779141107e-06, | |
| "logits/chosen": -0.5470824837684631, | |
| "logits/rejected": -0.48158833384513855, | |
| "logps/chosen": -1314.2874755859375, | |
| "logps/rejected": -1259.9593505859375, | |
| "loss": 0.6916049718856812, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.01713727042078972, | |
| "rewards/margins": 0.005170955788344145, | |
| "rewards/rejected": -0.02230822481215, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.45054945054945056, | |
| "grad_norm": 1.1842788457870483, | |
| "learning_rate": 1.5490797546012271e-06, | |
| "logits/chosen": -0.33465588092803955, | |
| "logits/rejected": -0.33280038833618164, | |
| "logps/chosen": -1350.5224609375, | |
| "logps/rejected": -1351.6611328125, | |
| "loss": 0.6924982666969299, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.011164035648107529, | |
| "rewards/margins": 0.004191071260720491, | |
| "rewards/rejected": -0.015355108305811882, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.45604395604395603, | |
| "grad_norm": 1.148301124572754, | |
| "learning_rate": 1.5337423312883437e-06, | |
| "logits/chosen": -0.4244360625743866, | |
| "logits/rejected": -0.49099138379096985, | |
| "logps/chosen": -1331.2615966796875, | |
| "logps/rejected": -1181.3885498046875, | |
| "loss": 0.6928058862686157, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.007538508623838425, | |
| "rewards/margins": 0.0015701393131166697, | |
| "rewards/rejected": -0.00910864770412445, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 1.7485556602478027, | |
| "learning_rate": 1.5184049079754604e-06, | |
| "logits/chosen": -0.42688897252082825, | |
| "logits/rejected": -0.5053452253341675, | |
| "logps/chosen": -1439.1773681640625, | |
| "logps/rejected": -1262.5604248046875, | |
| "loss": 0.6921294331550598, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0023087309673428535, | |
| "rewards/margins": 0.0033039902336895466, | |
| "rewards/rejected": -0.0056127216666936874, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.46703296703296704, | |
| "grad_norm": 1.6599191427230835, | |
| "learning_rate": 1.5030674846625768e-06, | |
| "logits/chosen": -0.43120938539505005, | |
| "logits/rejected": -0.6010267734527588, | |
| "logps/chosen": -1558.959228515625, | |
| "logps/rejected": -1472.342529296875, | |
| "loss": 0.6921494007110596, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.022558651864528656, | |
| "rewards/margins": 0.005641758441925049, | |
| "rewards/rejected": 0.016916893422603607, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4725274725274725, | |
| "grad_norm": 1.0252467393875122, | |
| "learning_rate": 1.4877300613496934e-06, | |
| "logits/chosen": -0.37089625000953674, | |
| "logits/rejected": -0.35030075907707214, | |
| "logps/chosen": -1299.443115234375, | |
| "logps/rejected": -1176.6553955078125, | |
| "loss": 0.6907151341438293, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.024281539022922516, | |
| "rewards/margins": 0.009798645041882992, | |
| "rewards/rejected": 0.014482893981039524, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.47802197802197804, | |
| "grad_norm": 1.026886224746704, | |
| "learning_rate": 1.47239263803681e-06, | |
| "logits/chosen": -0.601810872554779, | |
| "logits/rejected": -0.5252466201782227, | |
| "logps/chosen": -1495.2203369140625, | |
| "logps/rejected": -1310.950927734375, | |
| "loss": 0.6917718648910522, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0073541309684515, | |
| "rewards/margins": 0.007022920064628124, | |
| "rewards/rejected": 0.00033121160231530666, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 0.6109346151351929, | |
| "learning_rate": 1.4570552147239264e-06, | |
| "logits/chosen": -0.3959222435951233, | |
| "logits/rejected": -0.2995205819606781, | |
| "logps/chosen": -1450.761474609375, | |
| "logps/rejected": -1450.51953125, | |
| "loss": 0.6905621886253357, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0416596420109272, | |
| "rewards/margins": 0.013335322961211205, | |
| "rewards/rejected": 0.028324317187070847, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.489010989010989, | |
| "grad_norm": 0.8334896564483643, | |
| "learning_rate": 1.441717791411043e-06, | |
| "logits/chosen": -0.3118741810321808, | |
| "logits/rejected": -0.4128867983818054, | |
| "logps/chosen": -1281.40478515625, | |
| "logps/rejected": -1296.41357421875, | |
| "loss": 0.6907057762145996, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.014409005641937256, | |
| "rewards/margins": 0.014574131928384304, | |
| "rewards/rejected": -0.0001651242928346619, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.8609052300453186, | |
| "learning_rate": 1.4263803680981596e-06, | |
| "logits/chosen": -0.2428097128868103, | |
| "logits/rejected": -0.23511645197868347, | |
| "logps/chosen": -1411.8843994140625, | |
| "logps/rejected": -1272.73876953125, | |
| "loss": 0.6919518709182739, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.026493338868021965, | |
| "rewards/margins": 0.014896931126713753, | |
| "rewards/rejected": 0.011596407741308212, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.7259723544120789, | |
| "learning_rate": 1.411042944785276e-06, | |
| "logits/chosen": -0.11918177455663681, | |
| "logits/rejected": -0.07845693826675415, | |
| "logps/chosen": -1513.873779296875, | |
| "logps/rejected": -1462.4080810546875, | |
| "loss": 0.6918542981147766, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0520310252904892, | |
| "rewards/margins": 0.011454623192548752, | |
| "rewards/rejected": 0.040576402097940445, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 1.1002850532531738, | |
| "learning_rate": 1.3957055214723927e-06, | |
| "logits/chosen": -0.2559316158294678, | |
| "logits/rejected": -0.200018048286438, | |
| "logps/chosen": -1493.306396484375, | |
| "logps/rejected": -1425.4647216796875, | |
| "loss": 0.6930555105209351, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.02612033672630787, | |
| "rewards/margins": -0.002100848825648427, | |
| "rewards/rejected": 0.028221188113093376, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.510989010989011, | |
| "grad_norm": 0.7148520946502686, | |
| "learning_rate": 1.3803680981595095e-06, | |
| "logits/chosen": -0.367002010345459, | |
| "logits/rejected": -0.4066450893878937, | |
| "logps/chosen": -1357.5050048828125, | |
| "logps/rejected": -1381.8837890625, | |
| "loss": 0.6922594308853149, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.04670805484056473, | |
| "rewards/margins": 0.009145058691501617, | |
| "rewards/rejected": 0.03756299987435341, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5164835164835165, | |
| "grad_norm": 1.1037565469741821, | |
| "learning_rate": 1.3650306748466257e-06, | |
| "logits/chosen": -0.5499804615974426, | |
| "logits/rejected": -0.45308274030685425, | |
| "logps/chosen": -1548.565185546875, | |
| "logps/rejected": -1470.4267578125, | |
| "loss": 0.6900733709335327, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.038844555616378784, | |
| "rewards/margins": 0.01671256124973297, | |
| "rewards/rejected": 0.022131996229290962, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.521978021978022, | |
| "grad_norm": 0.8843666911125183, | |
| "learning_rate": 1.3496932515337425e-06, | |
| "logits/chosen": 0.04877326637506485, | |
| "logits/rejected": -0.005727946758270264, | |
| "logps/chosen": -1560.8104248046875, | |
| "logps/rejected": -1314.3270263671875, | |
| "loss": 0.6904133558273315, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.030398786067962646, | |
| "rewards/margins": 0.02400778792798519, | |
| "rewards/rejected": 0.006391000468283892, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 1.0202571153640747, | |
| "learning_rate": 1.3343558282208591e-06, | |
| "logits/chosen": -0.12934473156929016, | |
| "logits/rejected": -0.175532728433609, | |
| "logps/chosen": -1101.5224609375, | |
| "logps/rejected": -1186.7508544921875, | |
| "loss": 0.6912416219711304, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.01369690801948309, | |
| "rewards/margins": 0.005995034705847502, | |
| "rewards/rejected": 0.007701873779296875, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.532967032967033, | |
| "grad_norm": 1.3046600818634033, | |
| "learning_rate": 1.3190184049079755e-06, | |
| "logits/chosen": -0.3259139955043793, | |
| "logits/rejected": -0.36783546209335327, | |
| "logps/chosen": -1740.535888671875, | |
| "logps/rejected": -1574.8447265625, | |
| "loss": 0.6932768821716309, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.02200869657099247, | |
| "rewards/margins": 0.005453357473015785, | |
| "rewards/rejected": 0.016555337235331535, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 1.0123155117034912, | |
| "learning_rate": 1.3036809815950922e-06, | |
| "logits/chosen": -0.13210992515087128, | |
| "logits/rejected": -0.04369845241308212, | |
| "logps/chosen": -1536.9061279296875, | |
| "logps/rejected": -1519.1163330078125, | |
| "loss": 0.6910783648490906, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.01258872076869011, | |
| "rewards/margins": 0.010054588317871094, | |
| "rewards/rejected": 0.002534131519496441, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5439560439560439, | |
| "grad_norm": 1.3854718208312988, | |
| "learning_rate": 1.2883435582822088e-06, | |
| "logits/chosen": -0.18825362622737885, | |
| "logits/rejected": -0.24444834887981415, | |
| "logps/chosen": -1398.3173828125, | |
| "logps/rejected": -1364.36865234375, | |
| "loss": 0.6922256946563721, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.01920771598815918, | |
| "rewards/margins": 0.014828749001026154, | |
| "rewards/rejected": 0.004378966521471739, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 1.2779775857925415, | |
| "learning_rate": 1.2730061349693252e-06, | |
| "logits/chosen": -0.4430152475833893, | |
| "logits/rejected": -0.35647666454315186, | |
| "logps/chosen": -1487.2095947265625, | |
| "logps/rejected": -1470.6143798828125, | |
| "loss": 0.6919921040534973, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.009396498091518879, | |
| "rewards/margins": 0.005829436704516411, | |
| "rewards/rejected": -0.01522593479603529, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.554945054945055, | |
| "grad_norm": 0.6213805675506592, | |
| "learning_rate": 1.2576687116564418e-06, | |
| "logits/chosen": -0.22846508026123047, | |
| "logits/rejected": -0.21416452527046204, | |
| "logps/chosen": -1408.79833984375, | |
| "logps/rejected": -1421.5936279296875, | |
| "loss": 0.6927986145019531, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.04689623415470123, | |
| "rewards/margins": -0.009503163397312164, | |
| "rewards/rejected": -0.03739307448267937, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5604395604395604, | |
| "grad_norm": 0.9802994132041931, | |
| "learning_rate": 1.2423312883435584e-06, | |
| "logits/chosen": -0.505018949508667, | |
| "logits/rejected": -0.5704793334007263, | |
| "logps/chosen": -1257.908447265625, | |
| "logps/rejected": -1249.787353515625, | |
| "loss": 0.6931822896003723, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.025434236973524094, | |
| "rewards/margins": -0.002385740401223302, | |
| "rewards/rejected": -0.02304849587380886, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5659340659340659, | |
| "grad_norm": 0.8993610739707947, | |
| "learning_rate": 1.226993865030675e-06, | |
| "logits/chosen": -0.41527795791625977, | |
| "logits/rejected": -0.29129812121391296, | |
| "logps/chosen": -1268.831787109375, | |
| "logps/rejected": -1143.24609375, | |
| "loss": 0.69354248046875, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.008909551426768303, | |
| "rewards/margins": 0.0018367673037573695, | |
| "rewards/rejected": -0.010746316984295845, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 1.2592899799346924, | |
| "learning_rate": 1.2116564417177914e-06, | |
| "logits/chosen": -0.34576109051704407, | |
| "logits/rejected": -0.3690977990627289, | |
| "logps/chosen": -1669.214599609375, | |
| "logps/rejected": -1526.4136962890625, | |
| "loss": 0.6926941275596619, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0013259842526167631, | |
| "rewards/margins": 0.004025318659842014, | |
| "rewards/rejected": -0.005351303145289421, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 1.046607494354248, | |
| "learning_rate": 1.196319018404908e-06, | |
| "logits/chosen": -0.15831628441810608, | |
| "logits/rejected": -0.37302541732788086, | |
| "logps/chosen": -1464.148193359375, | |
| "logps/rejected": -1303.965576171875, | |
| "loss": 0.6931281685829163, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.02865537628531456, | |
| "rewards/margins": -0.001078385510481894, | |
| "rewards/rejected": -0.027576986700296402, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5824175824175825, | |
| "grad_norm": 0.426277756690979, | |
| "learning_rate": 1.1809815950920247e-06, | |
| "logits/chosen": -0.5624100565910339, | |
| "logits/rejected": -0.5374657511711121, | |
| "logps/chosen": -1273.535888671875, | |
| "logps/rejected": -1335.310791015625, | |
| "loss": 0.6924761533737183, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.0322088897228241, | |
| "rewards/margins": -0.0029357431922107935, | |
| "rewards/rejected": -0.029273148626089096, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5879120879120879, | |
| "grad_norm": 0.6133700013160706, | |
| "learning_rate": 1.165644171779141e-06, | |
| "logits/chosen": -0.20700356364250183, | |
| "logits/rejected": -0.22031204402446747, | |
| "logps/chosen": -1394.43017578125, | |
| "logps/rejected": -1308.563720703125, | |
| "loss": 0.6926645636558533, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.04462941363453865, | |
| "rewards/margins": -0.008419161662459373, | |
| "rewards/rejected": -0.03621025010943413, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.6847041845321655, | |
| "learning_rate": 1.1503067484662577e-06, | |
| "logits/chosen": -0.14995786547660828, | |
| "logits/rejected": -0.14485234022140503, | |
| "logps/chosen": -1369.47998046875, | |
| "logps/rejected": -1361.496337890625, | |
| "loss": 0.6923136115074158, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.03646060824394226, | |
| "rewards/margins": 0.0012728970032185316, | |
| "rewards/rejected": -0.03773351013660431, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5989010989010989, | |
| "grad_norm": 0.6556602120399475, | |
| "learning_rate": 1.1349693251533743e-06, | |
| "logits/chosen": -0.17063872516155243, | |
| "logits/rejected": -0.25048452615737915, | |
| "logps/chosen": -1319.7943115234375, | |
| "logps/rejected": -1256.775634765625, | |
| "loss": 0.6931478977203369, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.025922460481524467, | |
| "rewards/margins": -0.00031948136165738106, | |
| "rewards/rejected": -0.025602979585528374, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.8980839848518372, | |
| "learning_rate": 1.119631901840491e-06, | |
| "logits/chosen": -0.3595108091831207, | |
| "logits/rejected": -0.34528863430023193, | |
| "logps/chosen": -1376.7236328125, | |
| "logps/rejected": -1430.8155517578125, | |
| "loss": 0.693233847618103, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.022598477080464363, | |
| "rewards/margins": 0.000155882618855685, | |
| "rewards/rejected": -0.022754356265068054, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6098901098901099, | |
| "grad_norm": 1.0571579933166504, | |
| "learning_rate": 1.1042944785276075e-06, | |
| "logits/chosen": -0.37562742829322815, | |
| "logits/rejected": -0.4181535243988037, | |
| "logps/chosen": -1414.00732421875, | |
| "logps/rejected": -1326.544677734375, | |
| "loss": 0.6914387345314026, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.012199820019304752, | |
| "rewards/margins": 0.0014734650030732155, | |
| "rewards/rejected": -0.013673287816345692, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.7735028266906738, | |
| "learning_rate": 1.088957055214724e-06, | |
| "logits/chosen": -0.4293566346168518, | |
| "logits/rejected": -0.4803142547607422, | |
| "logps/chosen": -1466.178955078125, | |
| "logps/rejected": -1471.9337158203125, | |
| "loss": 0.6919817924499512, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.010825528763234615, | |
| "rewards/margins": 0.010156643576920033, | |
| "rewards/rejected": -0.020982172340154648, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6208791208791209, | |
| "grad_norm": 1.0000537633895874, | |
| "learning_rate": 1.0736196319018406e-06, | |
| "logits/chosen": -0.5714986324310303, | |
| "logits/rejected": -0.5201742053031921, | |
| "logps/chosen": -1415.6246337890625, | |
| "logps/rejected": -1385.709716796875, | |
| "loss": 0.6931148767471313, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.03592289984226227, | |
| "rewards/margins": 0.010911607183516026, | |
| "rewards/rejected": 0.02501128986477852, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6263736263736264, | |
| "grad_norm": 0.8961329460144043, | |
| "learning_rate": 1.0582822085889572e-06, | |
| "logits/chosen": -0.45078492164611816, | |
| "logits/rejected": -0.4087766408920288, | |
| "logps/chosen": -1351.2510986328125, | |
| "logps/rejected": -1375.3194580078125, | |
| "loss": 0.6920664310455322, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.029718702659010887, | |
| "rewards/margins": 0.006335887126624584, | |
| "rewards/rejected": 0.02338281460106373, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6318681318681318, | |
| "grad_norm": 0.9695339202880859, | |
| "learning_rate": 1.0429447852760736e-06, | |
| "logits/chosen": -0.5634657144546509, | |
| "logits/rejected": -0.6761491894721985, | |
| "logps/chosen": -1333.12109375, | |
| "logps/rejected": -1317.586669921875, | |
| "loss": 0.6914012432098389, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.01340350229293108, | |
| "rewards/margins": 0.008562865667045116, | |
| "rewards/rejected": 0.0048406366258859634, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 0.8852068185806274, | |
| "learning_rate": 1.0276073619631902e-06, | |
| "logits/chosen": -0.2876928150653839, | |
| "logits/rejected": -0.397905558347702, | |
| "logps/chosen": -1615.9324951171875, | |
| "logps/rejected": -1533.5203857421875, | |
| "loss": 0.6928586959838867, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.00988774187862873, | |
| "rewards/margins": 0.0017955403309315443, | |
| "rewards/rejected": -0.011683283373713493, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6428571428571429, | |
| "grad_norm": 0.9956200122833252, | |
| "learning_rate": 1.0122699386503068e-06, | |
| "logits/chosen": -0.7941329479217529, | |
| "logits/rejected": -0.8391545414924622, | |
| "logps/chosen": -1485.06884765625, | |
| "logps/rejected": -1504.8533935546875, | |
| "loss": 0.6929861307144165, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.005250845104455948, | |
| "rewards/margins": -0.0009261607192456722, | |
| "rewards/rejected": -0.004324684850871563, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6483516483516484, | |
| "grad_norm": 0.8900634050369263, | |
| "learning_rate": 9.969325153374232e-07, | |
| "logits/chosen": -0.4403144121170044, | |
| "logits/rejected": -0.3117816746234894, | |
| "logps/chosen": -1124.2105712890625, | |
| "logps/rejected": -1162.626220703125, | |
| "loss": 0.6915737390518188, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.018135830760002136, | |
| "rewards/margins": 0.004251901991665363, | |
| "rewards/rejected": -0.022387731820344925, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6538461538461539, | |
| "grad_norm": 1.519210934638977, | |
| "learning_rate": 9.8159509202454e-07, | |
| "logits/chosen": -0.3049584925174713, | |
| "logits/rejected": -0.348927766084671, | |
| "logps/chosen": -1318.882568359375, | |
| "logps/rejected": -1179.972900390625, | |
| "loss": 0.6907892227172852, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.009297370910644531, | |
| "rewards/margins": 0.0032487385906279087, | |
| "rewards/rejected": -0.012546109966933727, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 1.4839621782302856, | |
| "learning_rate": 9.662576687116565e-07, | |
| "logits/chosen": -0.7371286153793335, | |
| "logits/rejected": -0.813791036605835, | |
| "logps/chosen": -1551.0048828125, | |
| "logps/rejected": -1457.481201171875, | |
| "loss": 0.692111074924469, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.006462344899773598, | |
| "rewards/margins": 0.0030792569741606712, | |
| "rewards/rejected": -0.009541602805256844, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6648351648351648, | |
| "grad_norm": 1.420235514640808, | |
| "learning_rate": 9.509202453987732e-07, | |
| "logits/chosen": -0.3957596719264984, | |
| "logits/rejected": -0.44786208868026733, | |
| "logps/chosen": -1695.80517578125, | |
| "logps/rejected": -1655.417236328125, | |
| "loss": 0.6925563812255859, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.018932923674583435, | |
| "rewards/margins": 0.0009399798000231385, | |
| "rewards/rejected": -0.01987290196120739, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6703296703296703, | |
| "grad_norm": 1.4338594675064087, | |
| "learning_rate": 9.355828220858897e-07, | |
| "logits/chosen": -0.6365727186203003, | |
| "logits/rejected": -0.6079616546630859, | |
| "logps/chosen": -1449.3353271484375, | |
| "logps/rejected": -1306.079345703125, | |
| "loss": 0.6926460862159729, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.01147129014134407, | |
| "rewards/margins": 0.005524273030459881, | |
| "rewards/rejected": -0.016995562240481377, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6758241758241759, | |
| "grad_norm": 1.0156636238098145, | |
| "learning_rate": 9.202453987730062e-07, | |
| "logits/chosen": -0.2907084822654724, | |
| "logits/rejected": -0.3623642325401306, | |
| "logps/chosen": -1366.35009765625, | |
| "logps/rejected": -1161.46875, | |
| "loss": 0.6904911398887634, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.017240682616829872, | |
| "rewards/margins": 0.014017186127603054, | |
| "rewards/rejected": 0.0032234953250736, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 1.0583997964859009, | |
| "learning_rate": 9.049079754601228e-07, | |
| "logits/chosen": -0.7642356157302856, | |
| "logits/rejected": -0.7943760752677917, | |
| "logps/chosen": -1484.6651611328125, | |
| "logps/rejected": -1499.005126953125, | |
| "loss": 0.6919360160827637, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.029063954949378967, | |
| "rewards/margins": 0.013380720280110836, | |
| "rewards/rejected": 0.015683233737945557, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6868131868131868, | |
| "grad_norm": 1.5796548128128052, | |
| "learning_rate": 8.895705521472393e-07, | |
| "logits/chosen": -0.727140486240387, | |
| "logits/rejected": -0.7532109022140503, | |
| "logps/chosen": -1440.1063232421875, | |
| "logps/rejected": -1624.1826171875, | |
| "loss": 0.6913582682609558, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.9935792554169893e-05, | |
| "rewards/margins": 0.007877510972321033, | |
| "rewards/rejected": -0.007907447405159473, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.86751389503479, | |
| "learning_rate": 8.742331288343559e-07, | |
| "logits/chosen": -0.389473021030426, | |
| "logits/rejected": -0.41523274779319763, | |
| "logps/chosen": -1585.4542236328125, | |
| "logps/rejected": -1302.420166015625, | |
| "loss": 0.6916378736495972, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.022927483543753624, | |
| "rewards/margins": -0.0011927932500839233, | |
| "rewards/rejected": -0.02173468843102455, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6978021978021978, | |
| "grad_norm": 0.7973591089248657, | |
| "learning_rate": 8.588957055214725e-07, | |
| "logits/chosen": -0.3697919547557831, | |
| "logits/rejected": -0.32609784603118896, | |
| "logps/chosen": -1386.21240234375, | |
| "logps/rejected": -1433.26171875, | |
| "loss": 0.6924079656600952, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.024928748607635498, | |
| "rewards/margins": 0.0017317293677479029, | |
| "rewards/rejected": -0.026660479605197906, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 0.8267586827278137, | |
| "learning_rate": 8.43558282208589e-07, | |
| "logits/chosen": -0.39392054080963135, | |
| "logits/rejected": -0.4094931483268738, | |
| "logps/chosen": -1449.90380859375, | |
| "logps/rejected": -1335.430908203125, | |
| "loss": 0.6908578872680664, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.037380997091531754, | |
| "rewards/margins": 0.0011315422598272562, | |
| "rewards/rejected": -0.038512542843818665, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7087912087912088, | |
| "grad_norm": 1.491594910621643, | |
| "learning_rate": 8.282208588957055e-07, | |
| "logits/chosen": -0.3594258725643158, | |
| "logits/rejected": -0.37132909893989563, | |
| "logps/chosen": -1539.3756103515625, | |
| "logps/rejected": -1565.0859375, | |
| "loss": 0.6924813985824585, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.011002148501574993, | |
| "rewards/margins": 0.0007907864637672901, | |
| "rewards/rejected": -0.011792936362326145, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.961751401424408, | |
| "learning_rate": 8.128834355828222e-07, | |
| "logits/chosen": -0.5297249555587769, | |
| "logits/rejected": -0.45530810952186584, | |
| "logps/chosen": -1412.378173828125, | |
| "logps/rejected": -1533.008544921875, | |
| "loss": 0.6914486885070801, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0015569772804155946, | |
| "rewards/margins": 0.0011243524495512247, | |
| "rewards/rejected": 0.0004326249472796917, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7197802197802198, | |
| "grad_norm": 1.0202986001968384, | |
| "learning_rate": 7.975460122699387e-07, | |
| "logits/chosen": -0.2956511974334717, | |
| "logits/rejected": -0.3015546500682831, | |
| "logps/chosen": -1375.919921875, | |
| "logps/rejected": -1224.4910888671875, | |
| "loss": 0.6927995681762695, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.004763889592140913, | |
| "rewards/margins": -0.001240363228134811, | |
| "rewards/rejected": -0.003523525781929493, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 1.1129013299942017, | |
| "learning_rate": 7.822085889570554e-07, | |
| "logits/chosen": -0.33333620429039, | |
| "logits/rejected": -0.3021361827850342, | |
| "logps/chosen": -1697.37109375, | |
| "logps/rejected": -1630.856689453125, | |
| "loss": 0.6946622729301453, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0037074373103678226, | |
| "rewards/margins": -0.003561133984476328, | |
| "rewards/rejected": -0.00014630379155278206, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7307692307692307, | |
| "grad_norm": 0.7518520355224609, | |
| "learning_rate": 7.668711656441719e-07, | |
| "logits/chosen": -0.8502654433250427, | |
| "logits/rejected": -0.8670777678489685, | |
| "logps/chosen": -1536.0880126953125, | |
| "logps/rejected": -1519.32666015625, | |
| "loss": 0.6922348737716675, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.001666030497290194, | |
| "rewards/margins": 0.007148093543946743, | |
| "rewards/rejected": -0.005482063163071871, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7362637362637363, | |
| "grad_norm": 1.1008754968643188, | |
| "learning_rate": 7.515337423312884e-07, | |
| "logits/chosen": -0.4814603924751282, | |
| "logits/rejected": -0.43353089690208435, | |
| "logps/chosen": -1490.4459228515625, | |
| "logps/rejected": -1474.66162109375, | |
| "loss": 0.6931823492050171, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.004679413046687841, | |
| "rewards/margins": 0.005811995826661587, | |
| "rewards/rejected": -0.010491408407688141, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7417582417582418, | |
| "grad_norm": 1.0830036401748657, | |
| "learning_rate": 7.36196319018405e-07, | |
| "logits/chosen": -0.5737816691398621, | |
| "logits/rejected": -0.5043767690658569, | |
| "logps/chosen": -1319.7476806640625, | |
| "logps/rejected": -1288.7066650390625, | |
| "loss": 0.6926530599594116, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0063733747228980064, | |
| "rewards/margins": -0.0013508339179679751, | |
| "rewards/rejected": 0.007724207825958729, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 1.152644157409668, | |
| "learning_rate": 7.208588957055215e-07, | |
| "logits/chosen": -0.4295804798603058, | |
| "logits/rejected": -0.39793089032173157, | |
| "logps/chosen": -1226.8475341796875, | |
| "logps/rejected": -1343.85107421875, | |
| "loss": 0.6914076805114746, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.03230779990553856, | |
| "rewards/margins": 0.011389164254069328, | |
| "rewards/rejected": 0.02091863378882408, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7527472527472527, | |
| "grad_norm": 0.779612123966217, | |
| "learning_rate": 7.05521472392638e-07, | |
| "logits/chosen": -0.689027726650238, | |
| "logits/rejected": -0.6783583164215088, | |
| "logps/chosen": -1387.3411865234375, | |
| "logps/rejected": -1337.02685546875, | |
| "loss": 0.6930306553840637, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.029504049569368362, | |
| "rewards/margins": 0.007792205549776554, | |
| "rewards/rejected": 0.021711843088269234, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7582417582417582, | |
| "grad_norm": 0.6696953773498535, | |
| "learning_rate": 6.901840490797547e-07, | |
| "logits/chosen": -0.4422488212585449, | |
| "logits/rejected": -0.40494176745414734, | |
| "logps/chosen": -1538.20556640625, | |
| "logps/rejected": -1443.9151611328125, | |
| "loss": 0.6923375725746155, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.03989330306649208, | |
| "rewards/margins": 0.011228306218981743, | |
| "rewards/rejected": 0.02866499498486519, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7637362637362637, | |
| "grad_norm": 1.1540745496749878, | |
| "learning_rate": 6.748466257668713e-07, | |
| "logits/chosen": -0.40560609102249146, | |
| "logits/rejected": -0.41341060400009155, | |
| "logps/chosen": -1528.6448974609375, | |
| "logps/rejected": -1515.0938720703125, | |
| "loss": 0.6929686069488525, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.017328977584838867, | |
| "rewards/margins": 0.005272225942462683, | |
| "rewards/rejected": 0.012056750245392323, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.9930852651596069, | |
| "learning_rate": 6.595092024539878e-07, | |
| "logits/chosen": -0.07321417331695557, | |
| "logits/rejected": -0.08877704292535782, | |
| "logps/chosen": -1580.5616455078125, | |
| "logps/rejected": -1326.6715087890625, | |
| "loss": 0.6931515336036682, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.02595691569149494, | |
| "rewards/margins": 0.0032394053414463997, | |
| "rewards/rejected": 0.022717511281371117, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7747252747252747, | |
| "grad_norm": 1.2813407182693481, | |
| "learning_rate": 6.441717791411044e-07, | |
| "logits/chosen": -0.3404533267021179, | |
| "logits/rejected": -0.39101195335388184, | |
| "logps/chosen": -1059.5224609375, | |
| "logps/rejected": -1201.876953125, | |
| "loss": 0.6932750940322876, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.011713255196809769, | |
| "rewards/margins": 0.004767118953168392, | |
| "rewards/rejected": 0.0069461362436413765, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7802197802197802, | |
| "grad_norm": 0.8233911991119385, | |
| "learning_rate": 6.288343558282209e-07, | |
| "logits/chosen": -0.4058606028556824, | |
| "logits/rejected": -0.2887468934059143, | |
| "logps/chosen": -993.4666137695312, | |
| "logps/rejected": -1229.0377197265625, | |
| "loss": 0.6933331489562988, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.021297745406627655, | |
| "rewards/margins": 0.007736450061202049, | |
| "rewards/rejected": 0.01356129627674818, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7857142857142857, | |
| "grad_norm": 0.9581381678581238, | |
| "learning_rate": 6.134969325153375e-07, | |
| "logits/chosen": -0.08297372609376907, | |
| "logits/rejected": -0.16077230870723724, | |
| "logps/chosen": -1555.29931640625, | |
| "logps/rejected": -1433.138916015625, | |
| "loss": 0.6934653520584106, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.03898211941123009, | |
| "rewards/margins": 0.010908895172178745, | |
| "rewards/rejected": 0.028073228895664215, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 1.3117345571517944, | |
| "learning_rate": 5.98159509202454e-07, | |
| "logits/chosen": -0.2987571656703949, | |
| "logits/rejected": -0.33382365107536316, | |
| "logps/chosen": -1226.4559326171875, | |
| "logps/rejected": -1198.4481201171875, | |
| "loss": 0.6925820112228394, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.0050286054611206055, | |
| "rewards/margins": 0.005197577178478241, | |
| "rewards/rejected": -0.0001689721830189228, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7967032967032966, | |
| "grad_norm": 1.4236594438552856, | |
| "learning_rate": 5.828220858895705e-07, | |
| "logits/chosen": -0.49541574716567993, | |
| "logits/rejected": -0.4821420907974243, | |
| "logps/chosen": -1369.834716796875, | |
| "logps/rejected": -1285.265625, | |
| "loss": 0.6933659315109253, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.005504836328327656, | |
| "rewards/margins": 0.00019674794748425484, | |
| "rewards/rejected": -0.005701584275811911, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8021978021978022, | |
| "grad_norm": 1.0803322792053223, | |
| "learning_rate": 5.674846625766872e-07, | |
| "logits/chosen": -0.41038578748703003, | |
| "logits/rejected": -0.22637057304382324, | |
| "logps/chosen": -1731.6552734375, | |
| "logps/rejected": -1467.64599609375, | |
| "loss": 0.6921501755714417, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.021399000659585, | |
| "rewards/margins": 0.0027879904955625534, | |
| "rewards/rejected": -0.024186991155147552, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.8076923076923077, | |
| "grad_norm": 1.168318748474121, | |
| "learning_rate": 5.521472392638038e-07, | |
| "logits/chosen": -0.034145765006542206, | |
| "logits/rejected": -0.19365057349205017, | |
| "logps/chosen": -1311.8702392578125, | |
| "logps/rejected": -1272.918701171875, | |
| "loss": 0.6925601959228516, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.009253215976059437, | |
| "rewards/margins": 0.007820549421012402, | |
| "rewards/rejected": 0.0014326669042930007, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 1.2571437358856201, | |
| "learning_rate": 5.368098159509203e-07, | |
| "logits/chosen": -0.6215896010398865, | |
| "logits/rejected": -0.6399307250976562, | |
| "logps/chosen": -1441.660888671875, | |
| "logps/rejected": -1511.095947265625, | |
| "loss": 0.6926032304763794, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.008588919416069984, | |
| "rewards/margins": 0.0011720561888068914, | |
| "rewards/rejected": 0.007416863460093737, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8186813186813187, | |
| "grad_norm": 1.3910409212112427, | |
| "learning_rate": 5.214723926380368e-07, | |
| "logits/chosen": -0.7271393537521362, | |
| "logits/rejected": -0.761420726776123, | |
| "logps/chosen": -1419.0078125, | |
| "logps/rejected": -1475.87353515625, | |
| "loss": 0.6915566325187683, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.022086886689066887, | |
| "rewards/margins": 0.0090789794921875, | |
| "rewards/rejected": 0.013007907196879387, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 1.4809271097183228, | |
| "learning_rate": 5.061349693251534e-07, | |
| "logits/chosen": -0.7618609666824341, | |
| "logits/rejected": -0.7325096130371094, | |
| "logps/chosen": -1367.3497314453125, | |
| "logps/rejected": -1415.16943359375, | |
| "loss": 0.6933605074882507, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.014094429090619087, | |
| "rewards/margins": 0.0031739422120153904, | |
| "rewards/rejected": 0.010920487344264984, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8296703296703297, | |
| "grad_norm": 1.8276501893997192, | |
| "learning_rate": 4.9079754601227e-07, | |
| "logits/chosen": -0.8397077322006226, | |
| "logits/rejected": -0.7813823223114014, | |
| "logps/chosen": -1705.48486328125, | |
| "logps/rejected": -1569.8170166015625, | |
| "loss": 0.6919160485267639, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.012699166312813759, | |
| "rewards/margins": 0.012700291350483894, | |
| "rewards/rejected": -1.1255033314228058e-06, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 1.1467257738113403, | |
| "learning_rate": 4.754601226993866e-07, | |
| "logits/chosen": -0.6820580959320068, | |
| "logits/rejected": -0.8008350729942322, | |
| "logps/chosen": -1449.619140625, | |
| "logps/rejected": -1359.572021484375, | |
| "loss": 0.6915912628173828, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.024284247308969498, | |
| "rewards/margins": 0.009465246461331844, | |
| "rewards/rejected": 0.014819002710282803, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8406593406593407, | |
| "grad_norm": 1.1940540075302124, | |
| "learning_rate": 4.601226993865031e-07, | |
| "logits/chosen": -0.4987485110759735, | |
| "logits/rejected": -0.4857948124408722, | |
| "logps/chosen": -1300.3497314453125, | |
| "logps/rejected": -1322.68017578125, | |
| "loss": 0.6913399696350098, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.018459968268871307, | |
| "rewards/margins": 0.00594030786305666, | |
| "rewards/rejected": 0.012519659474492073, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 1.2268966436386108, | |
| "learning_rate": 4.447852760736197e-07, | |
| "logits/chosen": -0.6148173213005066, | |
| "logits/rejected": -0.5686666965484619, | |
| "logps/chosen": -1196.4783935546875, | |
| "logps/rejected": -1201.1766357421875, | |
| "loss": 0.6927800178527832, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.009037494659423828, | |
| "rewards/margins": 0.0022115707397460938, | |
| "rewards/rejected": -0.011249064467847347, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8516483516483516, | |
| "grad_norm": 1.4325149059295654, | |
| "learning_rate": 4.2944785276073624e-07, | |
| "logits/chosen": -0.09442466497421265, | |
| "logits/rejected": 0.06447166204452515, | |
| "logps/chosen": -1344.0181884765625, | |
| "logps/rejected": -1390.199462890625, | |
| "loss": 0.6922627091407776, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.014286688528954983, | |
| "rewards/margins": -0.0005080983391962945, | |
| "rewards/rejected": -0.013778590597212315, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 1.6210157871246338, | |
| "learning_rate": 4.1411042944785275e-07, | |
| "logits/chosen": -0.5195028781890869, | |
| "logits/rejected": -0.581871509552002, | |
| "logps/chosen": -1449.6171875, | |
| "logps/rejected": -1445.11962890625, | |
| "loss": 0.6920541524887085, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.007332286797463894, | |
| "rewards/margins": 0.004986848682165146, | |
| "rewards/rejected": -0.012319136410951614, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8626373626373627, | |
| "grad_norm": 1.2568283081054688, | |
| "learning_rate": 3.9877300613496937e-07, | |
| "logits/chosen": -0.39413487911224365, | |
| "logits/rejected": -0.3989291191101074, | |
| "logps/chosen": -1560.168212890625, | |
| "logps/rejected": -1546.84326171875, | |
| "loss": 0.6931201219558716, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.03144290670752525, | |
| "rewards/margins": -0.005113277584314346, | |
| "rewards/rejected": -0.026329632848501205, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8681318681318682, | |
| "grad_norm": 0.5363301634788513, | |
| "learning_rate": 3.8343558282208593e-07, | |
| "logits/chosen": -0.32399898767471313, | |
| "logits/rejected": -0.2612525522708893, | |
| "logps/chosen": -1289.2220458984375, | |
| "logps/rejected": -1368.276123046875, | |
| "loss": 0.6930078268051147, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.020566385239362717, | |
| "rewards/margins": 0.007330484688282013, | |
| "rewards/rejected": -0.02789686992764473, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8736263736263736, | |
| "grad_norm": 1.1271432638168335, | |
| "learning_rate": 3.680981595092025e-07, | |
| "logits/chosen": -0.8580625057220459, | |
| "logits/rejected": -0.8349483013153076, | |
| "logps/chosen": -1564.2672119140625, | |
| "logps/rejected": -1513.3536376953125, | |
| "loss": 0.6922256946563721, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.021702442318201065, | |
| "rewards/margins": 0.0017409119755029678, | |
| "rewards/rejected": -0.023443354293704033, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 1.249580979347229, | |
| "learning_rate": 3.52760736196319e-07, | |
| "logits/chosen": -0.24156060814857483, | |
| "logits/rejected": -0.3282901644706726, | |
| "logps/chosen": -1468.79931640625, | |
| "logps/rejected": -1360.5396728515625, | |
| "loss": 0.6932394504547119, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.016846224665641785, | |
| "rewards/margins": -0.0005967399920336902, | |
| "rewards/rejected": -0.016249485313892365, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8846153846153846, | |
| "grad_norm": 1.7817763090133667, | |
| "learning_rate": 3.3742331288343563e-07, | |
| "logits/chosen": -0.4720512926578522, | |
| "logits/rejected": -0.47025078535079956, | |
| "logps/chosen": -1514.5360107421875, | |
| "logps/rejected": -1491.627197265625, | |
| "loss": 0.6926375031471252, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.020506229251623154, | |
| "rewards/margins": 0.008882999420166016, | |
| "rewards/rejected": -0.02938922867178917, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8901098901098901, | |
| "grad_norm": 0.959372878074646, | |
| "learning_rate": 3.220858895705522e-07, | |
| "logits/chosen": -0.35722965002059937, | |
| "logits/rejected": -0.41712379455566406, | |
| "logps/chosen": -1411.179931640625, | |
| "logps/rejected": -1289.341796875, | |
| "loss": 0.6943350434303284, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.024521712213754654, | |
| "rewards/margins": -0.0049790311604738235, | |
| "rewards/rejected": -0.01954268291592598, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8956043956043956, | |
| "grad_norm": 1.2075046300888062, | |
| "learning_rate": 3.0674846625766876e-07, | |
| "logits/chosen": -0.8041161298751831, | |
| "logits/rejected": -0.8109002113342285, | |
| "logps/chosen": -1505.929931640625, | |
| "logps/rejected": -1477.200927734375, | |
| "loss": 0.6926769614219666, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.028787925839424133, | |
| "rewards/margins": -0.00552023109048605, | |
| "rewards/rejected": -0.023267695680260658, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 1.387386441230774, | |
| "learning_rate": 2.9141104294478527e-07, | |
| "logits/chosen": -0.44806283712387085, | |
| "logits/rejected": -0.61948162317276, | |
| "logps/chosen": -1527.060302734375, | |
| "logps/rejected": -1494.800537109375, | |
| "loss": 0.692619264125824, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.011695308610796928, | |
| "rewards/margins": -0.0015909149078652263, | |
| "rewards/rejected": -0.01010439358651638, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.9065934065934066, | |
| "grad_norm": 1.484518051147461, | |
| "learning_rate": 2.760736196319019e-07, | |
| "logits/chosen": -0.622553288936615, | |
| "logits/rejected": -0.7360720634460449, | |
| "logps/chosen": -1395.18798828125, | |
| "logps/rejected": -1419.4432373046875, | |
| "loss": 0.6941399574279785, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.008678913116455078, | |
| "rewards/margins": 0.001724362256936729, | |
| "rewards/rejected": -0.010403275489807129, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9120879120879121, | |
| "grad_norm": 0.7226335406303406, | |
| "learning_rate": 2.607361963190184e-07, | |
| "logits/chosen": -0.43059980869293213, | |
| "logits/rejected": -0.5786530375480652, | |
| "logps/chosen": -1415.7808837890625, | |
| "logps/rejected": -1407.481201171875, | |
| "loss": 0.6924831867218018, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02000749483704567, | |
| "rewards/margins": -0.002660846570506692, | |
| "rewards/rejected": -0.01734665036201477, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.9175824175824175, | |
| "grad_norm": 1.1836820840835571, | |
| "learning_rate": 2.45398773006135e-07, | |
| "logits/chosen": -0.3440017104148865, | |
| "logits/rejected": -0.3202272355556488, | |
| "logps/chosen": -1425.4095458984375, | |
| "logps/rejected": -1382.1885986328125, | |
| "loss": 0.6946048140525818, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.013712071813642979, | |
| "rewards/margins": -0.006773996166884899, | |
| "rewards/rejected": -0.0069380756467580795, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 2.162140369415283, | |
| "learning_rate": 2.3006134969325155e-07, | |
| "logits/chosen": -0.6371763944625854, | |
| "logits/rejected": -0.7157647609710693, | |
| "logps/chosen": -1485.0994873046875, | |
| "logps/rejected": -1396.60693359375, | |
| "loss": 0.6927782297134399, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.014371508732438087, | |
| "rewards/margins": 0.0014920367393642664, | |
| "rewards/rejected": -0.01586354523897171, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9285714285714286, | |
| "grad_norm": 1.5967764854431152, | |
| "learning_rate": 2.1472392638036812e-07, | |
| "logits/chosen": -0.5323659777641296, | |
| "logits/rejected": -0.5269442200660706, | |
| "logps/chosen": -1347.4677734375, | |
| "logps/rejected": -1475.41064453125, | |
| "loss": 0.6943572163581848, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.0019814111292362213, | |
| "rewards/margins": -0.00141085684299469, | |
| "rewards/rejected": -0.0005705545772798359, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 1.2801460027694702, | |
| "learning_rate": 1.9938650306748468e-07, | |
| "logits/chosen": -0.7320770621299744, | |
| "logits/rejected": -0.7078484892845154, | |
| "logps/chosen": -1049.31298828125, | |
| "logps/rejected": -1216.61083984375, | |
| "loss": 0.6933168172836304, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.009347095154225826, | |
| "rewards/margins": -0.0006960107129998505, | |
| "rewards/rejected": -0.008651085197925568, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9395604395604396, | |
| "grad_norm": 1.4801154136657715, | |
| "learning_rate": 1.8404907975460125e-07, | |
| "logits/chosen": -0.8624351620674133, | |
| "logits/rejected": -0.6278290748596191, | |
| "logps/chosen": -1554.202392578125, | |
| "logps/rejected": -1768.3101806640625, | |
| "loss": 0.6939010620117188, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.011780891567468643, | |
| "rewards/margins": -0.0007463262882083654, | |
| "rewards/rejected": -0.011034565046429634, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 1.2581945657730103, | |
| "learning_rate": 1.6871165644171781e-07, | |
| "logits/chosen": -0.8042242527008057, | |
| "logits/rejected": -0.8509213328361511, | |
| "logps/chosen": -1742.9061279296875, | |
| "logps/rejected": -1663.30078125, | |
| "loss": 0.6920545101165771, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.009515208192169666, | |
| "rewards/margins": 0.006152589339762926, | |
| "rewards/rejected": -0.01566779799759388, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9505494505494505, | |
| "grad_norm": 1.6917072534561157, | |
| "learning_rate": 1.5337423312883438e-07, | |
| "logits/chosen": -0.677159309387207, | |
| "logits/rejected": -0.708314836025238, | |
| "logps/chosen": -1458.487548828125, | |
| "logps/rejected": -1540.4312744140625, | |
| "loss": 0.6939187049865723, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.008841114118695259, | |
| "rewards/margins": -0.0041299439035356045, | |
| "rewards/rejected": -0.004711170680820942, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9560439560439561, | |
| "grad_norm": 1.562403917312622, | |
| "learning_rate": 1.3803680981595094e-07, | |
| "logits/chosen": -0.8315936923027039, | |
| "logits/rejected": -0.8156816959381104, | |
| "logps/chosen": -1405.054931640625, | |
| "logps/rejected": -1378.0836181640625, | |
| "loss": 0.6930129528045654, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.008151168935000896, | |
| "rewards/margins": 0.0004372410476207733, | |
| "rewards/rejected": -0.008588409051299095, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 1.8198566436767578, | |
| "learning_rate": 1.226993865030675e-07, | |
| "logits/chosen": -0.35468000173568726, | |
| "logits/rejected": -0.46720781922340393, | |
| "logps/chosen": -1518.917236328125, | |
| "logps/rejected": -1435.27783203125, | |
| "loss": 0.6903528571128845, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.005114269442856312, | |
| "rewards/margins": 0.011224641464650631, | |
| "rewards/rejected": -0.006110372021794319, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 2.1096837520599365, | |
| "learning_rate": 1.0736196319018406e-07, | |
| "logits/chosen": -0.6721988320350647, | |
| "logits/rejected": -0.6404861211776733, | |
| "logps/chosen": -1544.8468017578125, | |
| "logps/rejected": -1587.005615234375, | |
| "loss": 0.6921041011810303, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.010166358202695847, | |
| "rewards/margins": 0.003511047223582864, | |
| "rewards/rejected": -0.013677406124770641, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9725274725274725, | |
| "grad_norm": 1.5950981378555298, | |
| "learning_rate": 9.202453987730062e-08, | |
| "logits/chosen": -0.867381751537323, | |
| "logits/rejected": -0.753036379814148, | |
| "logps/chosen": -1559.811767578125, | |
| "logps/rejected": -1612.46826171875, | |
| "loss": 0.6940016150474548, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.014099950902163982, | |
| "rewards/margins": -0.0024051289074122906, | |
| "rewards/rejected": -0.011694822460412979, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.978021978021978, | |
| "grad_norm": 1.7663509845733643, | |
| "learning_rate": 7.668711656441719e-08, | |
| "logits/chosen": -0.49028313159942627, | |
| "logits/rejected": -0.6348991990089417, | |
| "logps/chosen": -1656.97216796875, | |
| "logps/rejected": -1574.3087158203125, | |
| "loss": 0.6930438876152039, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.0007067109690979123, | |
| "rewards/margins": 0.002308547031134367, | |
| "rewards/rejected": -0.0030152583494782448, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9835164835164835, | |
| "grad_norm": 2.180283308029175, | |
| "learning_rate": 6.134969325153375e-08, | |
| "logits/chosen": -0.3077111840248108, | |
| "logits/rejected": -0.3495293855667114, | |
| "logps/chosen": -1459.8404541015625, | |
| "logps/rejected": -1447.5089111328125, | |
| "loss": 0.6924480199813843, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.008424444124102592, | |
| "rewards/margins": 0.0035952467005699873, | |
| "rewards/rejected": -0.012019690126180649, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 1.005254864692688, | |
| "learning_rate": 4.601226993865031e-08, | |
| "logits/chosen": -0.6914213299751282, | |
| "logits/rejected": -0.6896047592163086, | |
| "logps/chosen": -1281.80419921875, | |
| "logps/rejected": -1390.142578125, | |
| "loss": 0.692180871963501, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.00413126964122057, | |
| "rewards/margins": 0.0011504649883136153, | |
| "rewards/rejected": -0.0052817342802882195, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9945054945054945, | |
| "grad_norm": 1.4081807136535645, | |
| "learning_rate": 3.067484662576688e-08, | |
| "logits/chosen": -0.8277162313461304, | |
| "logits/rejected": -0.8068577647209167, | |
| "logps/chosen": -1470.6419677734375, | |
| "logps/rejected": -1466.707763671875, | |
| "loss": 0.6934786438941956, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.0031227758154273033, | |
| "rewards/margins": -0.004466504789888859, | |
| "rewards/rejected": 0.0013437292072921991, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1653679609298706, | |
| "learning_rate": 1.533742331288344e-08, | |
| "logits/chosen": -0.3629406690597534, | |
| "logits/rejected": -0.33546456694602966, | |
| "logps/chosen": -1682.532470703125, | |
| "logps/rejected": -1659.32470703125, | |
| "loss": 0.6933432221412659, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.00286815594881773, | |
| "rewards/margins": 0.000943202874623239, | |
| "rewards/rejected": 0.0019249536562711, | |
| "step": 182 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 182, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |