Instructions to use thavens/q3_8b_secalign_xml with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use thavens/q3_8b_secalign_xml with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("thavens/q3_8b_secalign_xml", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00333889816360601, | |
| "grad_norm": 0.8906780481338501, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.6484375, | |
| "logits/rejected": -2.7734375, | |
| "logps/chosen": -182.0, | |
| "logps/rejected": -244.5, | |
| "loss": 1.5239, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": 0.02294921875, | |
| "rewards/margins": -0.33056640625, | |
| "rewards/rejected": 0.3515625, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00667779632721202, | |
| "grad_norm": 0.8854408860206604, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "logits/chosen": -3.2109375, | |
| "logits/rejected": -2.6640625, | |
| "logps/chosen": -133.5, | |
| "logps/rejected": -258.5, | |
| "loss": 1.103, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.0283203125, | |
| "rewards/margins": 0.0703125, | |
| "rewards/rejected": -0.09814453125, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01001669449081803, | |
| "grad_norm": 1.1921889781951904, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "logits/chosen": -3.0, | |
| "logits/rejected": -2.6640625, | |
| "logps/chosen": -130.0, | |
| "logps/rejected": -226.0, | |
| "loss": 1.5938, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.44921875, | |
| "rewards/margins": -0.763671875, | |
| "rewards/rejected": 0.3125, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01335559265442404, | |
| "grad_norm": 0.9537683725357056, | |
| "learning_rate": 4.8e-05, | |
| "logits/chosen": -2.890625, | |
| "logits/rejected": -2.8125, | |
| "logps/chosen": -147.5, | |
| "logps/rejected": -281.0, | |
| "loss": 1.0503, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": 0.02099609375, | |
| "rewards/margins": 0.28369140625, | |
| "rewards/rejected": -0.26171875, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01669449081803005, | |
| "grad_norm": 1.2217826843261719, | |
| "learning_rate": 6.400000000000001e-05, | |
| "logits/chosen": -2.90625, | |
| "logits/rejected": -2.7734375, | |
| "logps/chosen": -135.75, | |
| "logps/rejected": -232.5, | |
| "loss": 1.3145, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.1611328125, | |
| "rewards/margins": 0.0048828125, | |
| "rewards/rejected": 0.15606689453125, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02003338898163606, | |
| "grad_norm": 1.0575661659240723, | |
| "learning_rate": 8e-05, | |
| "logits/chosen": -3.0, | |
| "logits/rejected": -2.640625, | |
| "logps/chosen": -142.5, | |
| "logps/rejected": -260.5, | |
| "loss": 1.1968, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.104736328125, | |
| "rewards/margins": -0.081298828125, | |
| "rewards/rejected": 0.185546875, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02337228714524207, | |
| "grad_norm": 1.3832917213439941, | |
| "learning_rate": 9.6e-05, | |
| "logits/chosen": -3.140625, | |
| "logits/rejected": -2.9375, | |
| "logps/chosen": -119.25, | |
| "logps/rejected": -241.0, | |
| "loss": 1.1948, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": 0.16455078125, | |
| "rewards/margins": -0.1988525390625, | |
| "rewards/rejected": 0.36328125, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02671118530884808, | |
| "grad_norm": 0.9630353450775146, | |
| "learning_rate": 0.000112, | |
| "logits/chosen": -2.7734375, | |
| "logits/rejected": -2.625, | |
| "logps/chosen": -159.5, | |
| "logps/rejected": -256.0, | |
| "loss": 0.8555, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": 0.0166015625, | |
| "rewards/margins": 1.04296875, | |
| "rewards/rejected": -1.02734375, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03005008347245409, | |
| "grad_norm": 1.3834831714630127, | |
| "learning_rate": 0.00012800000000000002, | |
| "logits/chosen": -2.921875, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -143.5, | |
| "logps/rejected": -224.5, | |
| "loss": 1.1067, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 0.2060546875, | |
| "rewards/margins": 0.4296875, | |
| "rewards/rejected": -0.222686767578125, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0333889816360601, | |
| "grad_norm": 1.5977782011032104, | |
| "learning_rate": 0.000144, | |
| "logits/chosen": -2.7890625, | |
| "logits/rejected": -2.7890625, | |
| "logps/chosen": -160.0, | |
| "logps/rejected": -271.0, | |
| "loss": 0.896, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": 0.0777587890625, | |
| "rewards/margins": 0.669921875, | |
| "rewards/rejected": -0.5927734375, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03672787979966611, | |
| "grad_norm": 0.9674336910247803, | |
| "learning_rate": 0.00016, | |
| "logits/chosen": -2.484375, | |
| "logits/rejected": -2.734375, | |
| "logps/chosen": -182.0, | |
| "logps/rejected": -217.0, | |
| "loss": 0.4347, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.208984375, | |
| "rewards/margins": 1.55078125, | |
| "rewards/rejected": -1.34375, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04006677796327212, | |
| "grad_norm": 1.0390831232070923, | |
| "learning_rate": 0.00015999950159857832, | |
| "logits/chosen": -2.953125, | |
| "logits/rejected": -2.78125, | |
| "logps/chosen": -149.0, | |
| "logps/rejected": -259.5, | |
| "loss": 0.3765, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.3333740234375, | |
| "rewards/margins": 2.6015625, | |
| "rewards/rejected": -2.265625, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04340567612687813, | |
| "grad_norm": 0.7539263963699341, | |
| "learning_rate": 0.00015999800640052332, | |
| "logits/chosen": -2.9609375, | |
| "logits/rejected": -2.734375, | |
| "logps/chosen": -164.5, | |
| "logps/rejected": -295.0, | |
| "loss": 0.2193, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.7236328125, | |
| "rewards/margins": 4.15625, | |
| "rewards/rejected": -3.4296875, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04674457429048414, | |
| "grad_norm": 0.8638622760772705, | |
| "learning_rate": 0.00015999551442446528, | |
| "logits/chosen": -3.2890625, | |
| "logits/rejected": -2.515625, | |
| "logps/chosen": -144.0, | |
| "logps/rejected": -289.0, | |
| "loss": 0.2724, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": 0.4716796875, | |
| "rewards/margins": 4.484375, | |
| "rewards/rejected": -4.015625, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.05008347245409015, | |
| "grad_norm": 0.5347347855567932, | |
| "learning_rate": 0.00015999202570145425, | |
| "logits/chosen": -3.2734375, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -113.0, | |
| "logps/rejected": -288.0, | |
| "loss": 0.1353, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.955078125, | |
| "rewards/margins": 5.78125, | |
| "rewards/rejected": -4.828125, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05342237061769616, | |
| "grad_norm": 0.5463722348213196, | |
| "learning_rate": 0.0001599875402749599, | |
| "logits/chosen": -3.359375, | |
| "logits/rejected": -2.7734375, | |
| "logps/chosen": -138.0, | |
| "logps/rejected": -241.0, | |
| "loss": 0.1262, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.4609375, | |
| "rewards/margins": 6.0625, | |
| "rewards/rejected": -4.609375, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05676126878130217, | |
| "grad_norm": 0.6224486231803894, | |
| "learning_rate": 0.00015998205820087077, | |
| "logits/chosen": -3.625, | |
| "logits/rejected": -2.78125, | |
| "logps/chosen": -105.0, | |
| "logps/rejected": -271.0, | |
| "loss": 0.1513, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.890625, | |
| "rewards/margins": 7.015625, | |
| "rewards/rejected": -5.125, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.06010016694490818, | |
| "grad_norm": 0.31991323828697205, | |
| "learning_rate": 0.00015997557954749368, | |
| "logits/chosen": -3.71875, | |
| "logits/rejected": -2.6171875, | |
| "logps/chosen": -113.5, | |
| "logps/rejected": -292.0, | |
| "loss": 0.0388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6953125, | |
| "rewards/margins": 8.3125, | |
| "rewards/rejected": -5.625, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06343906510851419, | |
| "grad_norm": 0.1611785888671875, | |
| "learning_rate": 0.00015996810439555294, | |
| "logits/chosen": -3.7578125, | |
| "logits/rejected": -2.9140625, | |
| "logps/chosen": -121.25, | |
| "logps/rejected": -330.0, | |
| "loss": 0.0091, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.328125, | |
| "rewards/margins": 8.875, | |
| "rewards/rejected": -6.515625, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0667779632721202, | |
| "grad_norm": 0.2885970175266266, | |
| "learning_rate": 0.00015995963283818918, | |
| "logits/chosen": -4.03125, | |
| "logits/rejected": -2.6953125, | |
| "logps/chosen": -117.75, | |
| "logps/rejected": -312.0, | |
| "loss": 0.0381, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 2.296875, | |
| "rewards/margins": 8.28125, | |
| "rewards/rejected": -6.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07011686143572621, | |
| "grad_norm": 0.1798153966665268, | |
| "learning_rate": 0.00015995016498095827, | |
| "logits/chosen": -3.6953125, | |
| "logits/rejected": -2.9453125, | |
| "logps/chosen": -124.0, | |
| "logps/rejected": -300.0, | |
| "loss": 0.0241, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 2.921875, | |
| "rewards/margins": 9.875, | |
| "rewards/rejected": -6.9375, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07345575959933222, | |
| "grad_norm": 0.04417094215750694, | |
| "learning_rate": 0.0001599397009418301, | |
| "logits/chosen": -3.7421875, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -129.5, | |
| "logps/rejected": -347.0, | |
| "loss": 0.0048, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4921875, | |
| "rewards/margins": 10.15625, | |
| "rewards/rejected": -7.671875, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07679465776293823, | |
| "grad_norm": 0.0934915617108345, | |
| "learning_rate": 0.00015992824085118694, | |
| "logits/chosen": -3.421875, | |
| "logits/rejected": -2.9765625, | |
| "logps/chosen": -108.0, | |
| "logps/rejected": -303.0, | |
| "loss": 0.008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8359375, | |
| "rewards/margins": 10.65625, | |
| "rewards/rejected": -7.8125, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.08013355592654424, | |
| "grad_norm": 0.03649423271417618, | |
| "learning_rate": 0.00015991578485182194, | |
| "logits/chosen": -3.96875, | |
| "logits/rejected": -2.7890625, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -345.0, | |
| "loss": 0.0028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4296875, | |
| "rewards/margins": 12.0, | |
| "rewards/rejected": -8.59375, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.08347245409015025, | |
| "grad_norm": 0.29851219058036804, | |
| "learning_rate": 0.00015990233309893726, | |
| "logits/chosen": -3.8203125, | |
| "logits/rejected": -3.1484375, | |
| "logps/chosen": -110.25, | |
| "logps/rejected": -313.0, | |
| "loss": 0.0403, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 3.0703125, | |
| "rewards/margins": 10.59375, | |
| "rewards/rejected": -7.5, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08681135225375626, | |
| "grad_norm": 0.09218787401914597, | |
| "learning_rate": 0.00015988788576014228, | |
| "logits/chosen": -4.1796875, | |
| "logits/rejected": -3.015625, | |
| "logps/chosen": -113.25, | |
| "logps/rejected": -331.0, | |
| "loss": 0.0081, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2578125, | |
| "rewards/margins": 12.0, | |
| "rewards/rejected": -8.75, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.09015025041736227, | |
| "grad_norm": 0.010930394753813744, | |
| "learning_rate": 0.0001598724430154513, | |
| "logits/chosen": -4.625, | |
| "logits/rejected": -3.0625, | |
| "logps/chosen": -75.0, | |
| "logps/rejected": -328.0, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5, | |
| "rewards/margins": 12.875, | |
| "rewards/rejected": -9.375, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.09348914858096828, | |
| "grad_norm": 0.025569891557097435, | |
| "learning_rate": 0.00015985600505728152, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -3.1015625, | |
| "logps/chosen": -81.75, | |
| "logps/rejected": -301.0, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8828125, | |
| "rewards/margins": 11.96875, | |
| "rewards/rejected": -9.09375, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.09682804674457429, | |
| "grad_norm": 0.00938540231436491, | |
| "learning_rate": 0.00015983857209045046, | |
| "logits/chosen": -3.8984375, | |
| "logits/rejected": -3.03125, | |
| "logps/chosen": -154.0, | |
| "logps/rejected": -353.0, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.40625, | |
| "rewards/margins": 12.96875, | |
| "rewards/rejected": -9.5625, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.1001669449081803, | |
| "grad_norm": 0.00449951458722353, | |
| "learning_rate": 0.00015982014433217346, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -3.15625, | |
| "logps/chosen": -110.25, | |
| "logps/rejected": -349.0, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6875, | |
| "rewards/margins": 14.40625, | |
| "rewards/rejected": -10.6875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10350584307178631, | |
| "grad_norm": 0.1915261447429657, | |
| "learning_rate": 0.0001598007220120611, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -3.09375, | |
| "logps/chosen": -108.25, | |
| "logps/rejected": -386.0, | |
| "loss": 0.006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6953125, | |
| "rewards/margins": 15.75, | |
| "rewards/rejected": -12.0625, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.10684474123539232, | |
| "grad_norm": 0.015207415446639061, | |
| "learning_rate": 0.0001597803053721162, | |
| "logits/chosen": -4.921875, | |
| "logits/rejected": -3.2734375, | |
| "logps/chosen": -102.25, | |
| "logps/rejected": -355.0, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0859375, | |
| "rewards/margins": 13.65625, | |
| "rewards/rejected": -10.5625, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.11018363939899833, | |
| "grad_norm": 0.052224867045879364, | |
| "learning_rate": 0.00015975889466673073, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -3.2265625, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -387.0, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1796875, | |
| "rewards/margins": 15.375, | |
| "rewards/rejected": -12.21875, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.11352253756260434, | |
| "grad_norm": 0.018112409859895706, | |
| "learning_rate": 0.0001597364901626829, | |
| "logits/chosen": -4.640625, | |
| "logits/rejected": -3.3828125, | |
| "logps/chosen": -121.5, | |
| "logps/rejected": -367.0, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.53125, | |
| "rewards/margins": 15.65625, | |
| "rewards/rejected": -12.125, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11686143572621036, | |
| "grad_norm": 0.03322592005133629, | |
| "learning_rate": 0.00015971309213913366, | |
| "logits/chosen": -4.984375, | |
| "logits/rejected": -3.4296875, | |
| "logps/chosen": -89.75, | |
| "logps/rejected": -365.0, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5078125, | |
| "rewards/margins": 15.53125, | |
| "rewards/rejected": -12.0625, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.12020033388981637, | |
| "grad_norm": 0.051106907427310944, | |
| "learning_rate": 0.00015968870088762315, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -3.28125, | |
| "logps/chosen": -98.75, | |
| "logps/rejected": -359.0, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.296875, | |
| "rewards/margins": 16.15625, | |
| "rewards/rejected": -12.90625, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.12353923205342238, | |
| "grad_norm": 0.040621671825647354, | |
| "learning_rate": 0.00015966331671206724, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -3.2421875, | |
| "logps/chosen": -120.0, | |
| "logps/rejected": -406.0, | |
| "loss": 0.0023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8671875, | |
| "rewards/margins": 17.4375, | |
| "rewards/rejected": -13.625, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.12687813021702837, | |
| "grad_norm": 0.053807105869054794, | |
| "learning_rate": 0.00015963693992875367, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -3.3203125, | |
| "logps/chosen": -117.0, | |
| "logps/rejected": -378.0, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3984375, | |
| "rewards/margins": 17.0625, | |
| "rewards/rejected": -13.6875, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1302170283806344, | |
| "grad_norm": 0.008678439073264599, | |
| "learning_rate": 0.00015960957086633812, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -3.5078125, | |
| "logps/chosen": -108.0, | |
| "logps/rejected": -368.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8515625, | |
| "rewards/margins": 18.6875, | |
| "rewards/rejected": -14.84375, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1335559265442404, | |
| "grad_norm": 0.05349210277199745, | |
| "learning_rate": 0.00015958120986584007, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -3.3203125, | |
| "logps/chosen": -121.0, | |
| "logps/rejected": -348.0, | |
| "loss": 0.0026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1953125, | |
| "rewards/margins": 16.15625, | |
| "rewards/rejected": -13.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13689482470784642, | |
| "grad_norm": 0.01919226534664631, | |
| "learning_rate": 0.00015955185728063859, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -3.65625, | |
| "logps/chosen": -111.75, | |
| "logps/rejected": -348.0, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3515625, | |
| "rewards/margins": 16.0, | |
| "rewards/rejected": -12.625, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.14023372287145242, | |
| "grad_norm": 0.003901825286448002, | |
| "learning_rate": 0.0001595215134764679, | |
| "logits/chosen": -4.890625, | |
| "logits/rejected": -3.484375, | |
| "logps/chosen": -96.75, | |
| "logps/rejected": -401.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0390625, | |
| "rewards/margins": 17.4375, | |
| "rewards/rejected": -14.40625, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.14357262103505844, | |
| "grad_norm": 1.0987324714660645, | |
| "learning_rate": 0.00015949017883141293, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -3.5625, | |
| "logps/chosen": -132.25, | |
| "logps/rejected": -354.0, | |
| "loss": 1.1098, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 2.125, | |
| "rewards/margins": 16.625, | |
| "rewards/rejected": -14.4375, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.14691151919866444, | |
| "grad_norm": 0.0008750148699618876, | |
| "learning_rate": 0.00015945785373590446, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -3.578125, | |
| "logps/chosen": -112.75, | |
| "logps/rejected": -404.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.40625, | |
| "rewards/margins": 20.1875, | |
| "rewards/rejected": -16.8125, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.15025041736227046, | |
| "grad_norm": 0.00039893100620247424, | |
| "learning_rate": 0.0001594245385927143, | |
| "logits/chosen": -4.796875, | |
| "logits/rejected": -3.6875, | |
| "logps/chosen": -109.0, | |
| "logps/rejected": -436.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.90625, | |
| "rewards/margins": 21.0625, | |
| "rewards/rejected": -17.25, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.15358931552587646, | |
| "grad_norm": 3.564608414308168e-05, | |
| "learning_rate": 0.00015939023381695034, | |
| "logits/chosen": -5.015625, | |
| "logits/rejected": -3.5546875, | |
| "logps/chosen": -122.0, | |
| "logps/rejected": -404.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9453125, | |
| "rewards/margins": 20.5, | |
| "rewards/rejected": -16.5625, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.15692821368948248, | |
| "grad_norm": 0.15678206086158752, | |
| "learning_rate": 0.0001593549398360513, | |
| "logits/chosen": -5.25, | |
| "logits/rejected": -3.546875, | |
| "logps/chosen": -103.75, | |
| "logps/rejected": -430.0, | |
| "loss": 0.0052, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.765625, | |
| "rewards/margins": 21.25, | |
| "rewards/rejected": -17.5, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.16026711185308848, | |
| "grad_norm": 0.003343533491715789, | |
| "learning_rate": 0.00015931865708978144, | |
| "logits/chosen": -4.828125, | |
| "logits/rejected": -3.75, | |
| "logps/chosen": -115.25, | |
| "logps/rejected": -455.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.46875, | |
| "rewards/margins": 22.1875, | |
| "rewards/rejected": -18.75, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1636060100166945, | |
| "grad_norm": 0.0013559595681726933, | |
| "learning_rate": 0.0001592813860302251, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -3.515625, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -460.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1484375, | |
| "rewards/margins": 22.1875, | |
| "rewards/rejected": -19.0, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.1669449081803005, | |
| "grad_norm": 0.010648728348314762, | |
| "learning_rate": 0.00015924312712178095, | |
| "logits/chosen": -5.34375, | |
| "logits/rejected": -3.6953125, | |
| "logps/chosen": -101.5, | |
| "logps/rejected": -440.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.828125, | |
| "rewards/margins": 21.875, | |
| "rewards/rejected": -19.0625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17028380634390652, | |
| "grad_norm": 0.0414469912648201, | |
| "learning_rate": 0.00015920388084115635, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -3.6484375, | |
| "logps/chosen": -111.0, | |
| "logps/rejected": -429.0, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6484375, | |
| "rewards/margins": 22.875, | |
| "rewards/rejected": -19.25, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.17362270450751252, | |
| "grad_norm": 0.0018979490268975496, | |
| "learning_rate": 0.00015916364767736143, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -3.546875, | |
| "logps/chosen": -132.25, | |
| "logps/rejected": -401.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0, | |
| "rewards/margins": 19.875, | |
| "rewards/rejected": -16.875, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.17696160267111852, | |
| "grad_norm": 1.743229768180754e-05, | |
| "learning_rate": 0.00015912242813170274, | |
| "logits/chosen": -4.984375, | |
| "logits/rejected": -3.7265625, | |
| "logps/chosen": -125.25, | |
| "logps/rejected": -457.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4140625, | |
| "rewards/margins": 23.25, | |
| "rewards/rejected": -19.875, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.18030050083472454, | |
| "grad_norm": 0.13024184107780457, | |
| "learning_rate": 0.00015908022271777733, | |
| "logits/chosen": -5.515625, | |
| "logits/rejected": -3.796875, | |
| "logps/chosen": -83.0, | |
| "logps/rejected": -467.0, | |
| "loss": 0.0043, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3828125, | |
| "rewards/margins": 24.0, | |
| "rewards/rejected": -20.625, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.18363939899833054, | |
| "grad_norm": 0.0001471200812375173, | |
| "learning_rate": 0.0001590370319614662, | |
| "logits/chosen": -4.859375, | |
| "logits/rejected": -3.703125, | |
| "logps/chosen": -111.5, | |
| "logps/rejected": -470.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.546875, | |
| "rewards/margins": 24.5625, | |
| "rewards/rejected": -21.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.18697829716193656, | |
| "grad_norm": 0.0001591207692399621, | |
| "learning_rate": 0.00015899285640092763, | |
| "logits/chosen": -5.046875, | |
| "logits/rejected": -3.8046875, | |
| "logps/chosen": -115.75, | |
| "logps/rejected": -455.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.703125, | |
| "rewards/margins": 23.625, | |
| "rewards/rejected": -19.875, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.19031719532554256, | |
| "grad_norm": 0.0009253205498680472, | |
| "learning_rate": 0.00015894769658659073, | |
| "logits/chosen": -4.65625, | |
| "logits/rejected": -3.65625, | |
| "logps/chosen": -122.25, | |
| "logps/rejected": -446.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.40625, | |
| "rewards/margins": 22.0625, | |
| "rewards/rejected": -18.5625, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.19365609348914858, | |
| "grad_norm": 4.7780202294234186e-05, | |
| "learning_rate": 0.00015890155308114837, | |
| "logits/chosen": -5.234375, | |
| "logits/rejected": -3.4765625, | |
| "logps/chosen": -113.75, | |
| "logps/rejected": -467.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6640625, | |
| "rewards/margins": 24.1875, | |
| "rewards/rejected": -20.5, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.19699499165275458, | |
| "grad_norm": 3.8026719266781583e-05, | |
| "learning_rate": 0.00015885442645955026, | |
| "logits/chosen": -5.0625, | |
| "logits/rejected": -3.890625, | |
| "logps/chosen": -119.75, | |
| "logps/rejected": -399.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.171875, | |
| "rewards/margins": 22.9375, | |
| "rewards/rejected": -18.75, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2003338898163606, | |
| "grad_norm": 0.00020460848463699222, | |
| "learning_rate": 0.00015880631730899578, | |
| "logits/chosen": -4.5859375, | |
| "logits/rejected": -3.53125, | |
| "logps/chosen": -99.0, | |
| "logps/rejected": -449.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6328125, | |
| "rewards/margins": 22.875, | |
| "rewards/rejected": -19.25, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2036727879799666, | |
| "grad_norm": 0.00048449577298015356, | |
| "learning_rate": 0.0001587572262289267, | |
| "logits/chosen": -5.546875, | |
| "logits/rejected": -3.9453125, | |
| "logps/chosen": -87.5, | |
| "logps/rejected": -464.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.96875, | |
| "rewards/margins": 24.4375, | |
| "rewards/rejected": -21.4375, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.20701168614357263, | |
| "grad_norm": 0.14330030977725983, | |
| "learning_rate": 0.00015870715383101955, | |
| "logits/chosen": -5.671875, | |
| "logits/rejected": -3.7265625, | |
| "logps/chosen": -80.25, | |
| "logps/rejected": -457.0, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5234375, | |
| "rewards/margins": 23.875, | |
| "rewards/rejected": -20.375, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.21035058430717862, | |
| "grad_norm": 5.157471969141625e-05, | |
| "learning_rate": 0.00015865610073917825, | |
| "logits/chosen": -5.65625, | |
| "logits/rejected": -3.875, | |
| "logps/chosen": -96.5, | |
| "logps/rejected": -399.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.0703125, | |
| "rewards/margins": 23.125, | |
| "rewards/rejected": -19.0625, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.21368948247078465, | |
| "grad_norm": 0.001272167544811964, | |
| "learning_rate": 0.0001586040675895261, | |
| "logits/chosen": -5.03125, | |
| "logits/rejected": -3.828125, | |
| "logps/chosen": -126.75, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1796875, | |
| "rewards/margins": 23.4375, | |
| "rewards/rejected": -20.3125, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.21702838063439064, | |
| "grad_norm": 0.10344758629798889, | |
| "learning_rate": 0.00015855105503039804, | |
| "logits/chosen": -5.0, | |
| "logits/rejected": -3.65625, | |
| "logps/chosen": -93.75, | |
| "logps/rejected": -463.0, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.921875, | |
| "rewards/margins": 24.0, | |
| "rewards/rejected": -20.0625, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.22036727879799667, | |
| "grad_norm": 0.000509591365698725, | |
| "learning_rate": 0.00015849706372233238, | |
| "logits/chosen": -5.390625, | |
| "logits/rejected": -3.7109375, | |
| "logps/chosen": -99.5, | |
| "logps/rejected": -421.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.34375, | |
| "rewards/margins": 22.625, | |
| "rewards/rejected": -19.3125, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.22370617696160267, | |
| "grad_norm": 0.0010495522292330861, | |
| "learning_rate": 0.0001584420943380628, | |
| "logits/chosen": -5.265625, | |
| "logits/rejected": -3.9375, | |
| "logps/chosen": -109.25, | |
| "logps/rejected": -458.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3203125, | |
| "rewards/margins": 23.1875, | |
| "rewards/rejected": -19.875, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2270450751252087, | |
| "grad_norm": 0.00022398516011890024, | |
| "learning_rate": 0.0001583861475625097, | |
| "logits/chosen": -5.296875, | |
| "logits/rejected": -3.6796875, | |
| "logps/chosen": -100.0, | |
| "logps/rejected": -489.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6796875, | |
| "rewards/margins": 24.6875, | |
| "rewards/rejected": -21.0, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2303839732888147, | |
| "grad_norm": 1.072521808964666e-05, | |
| "learning_rate": 0.00015832922409277198, | |
| "logits/chosen": -4.859375, | |
| "logits/rejected": -3.9765625, | |
| "logps/chosen": -124.75, | |
| "logps/rejected": -395.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.640625, | |
| "rewards/margins": 22.625, | |
| "rewards/rejected": -18.9375, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2337228714524207, | |
| "grad_norm": 0.00017957530508283526, | |
| "learning_rate": 0.00015827132463811804, | |
| "logits/chosen": -5.09375, | |
| "logits/rejected": -3.828125, | |
| "logps/chosen": -106.5, | |
| "logps/rejected": -407.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.609375, | |
| "rewards/margins": 22.5, | |
| "rewards/rejected": -18.875, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2370617696160267, | |
| "grad_norm": 0.0003923263284377754, | |
| "learning_rate": 0.00015821244991997717, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -3.53125, | |
| "logps/chosen": -102.25, | |
| "logps/rejected": -459.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9453125, | |
| "rewards/margins": 23.875, | |
| "rewards/rejected": -20.0, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.24040066777963273, | |
| "grad_norm": 1.9827170035568997e-05, | |
| "learning_rate": 0.0001581526006719304, | |
| "logits/chosen": -5.53125, | |
| "logits/rejected": -3.84375, | |
| "logps/chosen": -89.75, | |
| "logps/rejected": -465.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.28125, | |
| "rewards/margins": 24.75, | |
| "rewards/rejected": -20.4375, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.24373956594323873, | |
| "grad_norm": 0.2003210037946701, | |
| "learning_rate": 0.0001580917776397016, | |
| "logits/chosen": -4.875, | |
| "logits/rejected": -3.6875, | |
| "logps/chosen": -123.0, | |
| "logps/rejected": -445.0, | |
| "loss": 0.0063, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.0546875, | |
| "rewards/margins": 23.4375, | |
| "rewards/rejected": -19.3125, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.24707846410684475, | |
| "grad_norm": 4.490726860240102e-05, | |
| "learning_rate": 0.0001580299815811478, | |
| "logits/chosen": -5.046875, | |
| "logits/rejected": -3.6328125, | |
| "logps/chosen": -96.25, | |
| "logps/rejected": -463.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6015625, | |
| "rewards/margins": 24.4375, | |
| "rewards/rejected": -20.8125, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.25041736227045075, | |
| "grad_norm": 0.0011267291847616434, | |
| "learning_rate": 0.00015796721326625013, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -3.5625, | |
| "logps/chosen": -99.25, | |
| "logps/rejected": -478.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8046875, | |
| "rewards/margins": 25.25, | |
| "rewards/rejected": -21.4375, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.25375626043405675, | |
| "grad_norm": 2.087617986035184e-06, | |
| "learning_rate": 0.00015790347347710405, | |
| "logits/chosen": -5.046875, | |
| "logits/rejected": -3.53125, | |
| "logps/chosen": -118.0, | |
| "logps/rejected": -464.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9296875, | |
| "rewards/margins": 24.4375, | |
| "rewards/rejected": -20.5, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2570951585976628, | |
| "grad_norm": 0.00048489755135960877, | |
| "learning_rate": 0.00015783876300790956, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -3.6015625, | |
| "logps/chosen": -102.25, | |
| "logps/rejected": -466.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3359375, | |
| "rewards/margins": 23.375, | |
| "rewards/rejected": -20.0, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2604340567612688, | |
| "grad_norm": 5.250661706668325e-05, | |
| "learning_rate": 0.0001577730826649614, | |
| "logits/chosen": -4.984375, | |
| "logits/rejected": -3.484375, | |
| "logps/chosen": -123.25, | |
| "logps/rejected": -448.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.234375, | |
| "rewards/margins": 23.6875, | |
| "rewards/rejected": -19.5, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2637729549248748, | |
| "grad_norm": 2.072748429782223e-05, | |
| "learning_rate": 0.00015770643326663898, | |
| "logits/chosen": -5.25, | |
| "logits/rejected": -3.359375, | |
| "logps/chosen": -80.25, | |
| "logps/rejected": -486.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.46875, | |
| "rewards/margins": 23.625, | |
| "rewards/rejected": -20.1875, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2671118530884808, | |
| "grad_norm": 0.002645147731527686, | |
| "learning_rate": 0.0001576388156433962, | |
| "logits/chosen": -5.15625, | |
| "logits/rejected": -3.515625, | |
| "logps/chosen": -119.0, | |
| "logps/rejected": -456.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.765625, | |
| "rewards/margins": 24.0625, | |
| "rewards/rejected": -20.3125, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2704507512520868, | |
| "grad_norm": 1.2537796465039719e-05, | |
| "learning_rate": 0.00015757023063775106, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -3.5078125, | |
| "logps/chosen": -119.25, | |
| "logps/rejected": -425.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.2421875, | |
| "rewards/margins": 23.25, | |
| "rewards/rejected": -19.0, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.27378964941569284, | |
| "grad_norm": 0.0001700354478089139, | |
| "learning_rate": 0.00015750067910427513, | |
| "logits/chosen": -5.125, | |
| "logits/rejected": -3.59375, | |
| "logps/chosen": -108.5, | |
| "logps/rejected": -414.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.7421875, | |
| "rewards/margins": 22.0625, | |
| "rewards/rejected": -18.3125, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.27712854757929883, | |
| "grad_norm": 5.510517439688556e-05, | |
| "learning_rate": 0.000157430161909583, | |
| "logits/chosen": -5.0, | |
| "logits/rejected": -3.40625, | |
| "logps/chosen": -102.0, | |
| "logps/rejected": -452.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8828125, | |
| "rewards/margins": 22.875, | |
| "rewards/rejected": -19.0, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.28046744574290483, | |
| "grad_norm": 0.0010590353049337864, | |
| "learning_rate": 0.00015735867993232143, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -3.5703125, | |
| "logps/chosen": -105.25, | |
| "logps/rejected": -399.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.2265625, | |
| "rewards/margins": 21.9375, | |
| "rewards/rejected": -17.6875, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2838063439065108, | |
| "grad_norm": 9.394536027684808e-05, | |
| "learning_rate": 0.0001572862340631584, | |
| "logits/chosen": -4.984375, | |
| "logits/rejected": -3.6875, | |
| "logps/chosen": -90.75, | |
| "logps/rejected": -433.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.3125, | |
| "rewards/margins": 22.875, | |
| "rewards/rejected": -18.5625, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2871452420701169, | |
| "grad_norm": 0.0001689967029960826, | |
| "learning_rate": 0.00015721282520477197, | |
| "logits/chosen": -4.828125, | |
| "logits/rejected": -3.65625, | |
| "logps/chosen": -95.5, | |
| "logps/rejected": -429.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9375, | |
| "rewards/margins": 22.5, | |
| "rewards/rejected": -18.5625, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2904841402337229, | |
| "grad_norm": 9.815259545575827e-06, | |
| "learning_rate": 0.00015713845427183922, | |
| "logits/chosen": -5.09375, | |
| "logits/rejected": -3.4140625, | |
| "logps/chosen": -87.5, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.53125, | |
| "rewards/margins": 24.1875, | |
| "rewards/rejected": -19.625, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2938230383973289, | |
| "grad_norm": 2.0043949916725978e-05, | |
| "learning_rate": 0.0001570631221910245, | |
| "logits/chosen": -4.703125, | |
| "logits/rejected": -3.5546875, | |
| "logps/chosen": -127.0, | |
| "logps/rejected": -440.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.265625, | |
| "rewards/margins": 23.9375, | |
| "rewards/rejected": -19.6875, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.29716193656093487, | |
| "grad_norm": 0.000917528523132205, | |
| "learning_rate": 0.00015698682990096828, | |
| "logits/chosen": -4.90625, | |
| "logits/rejected": -3.5625, | |
| "logps/chosen": -111.25, | |
| "logps/rejected": -377.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.546875, | |
| "rewards/margins": 22.25, | |
| "rewards/rejected": -17.6875, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3005008347245409, | |
| "grad_norm": 8.072228229139e-05, | |
| "learning_rate": 0.00015690957835227522, | |
| "logits/chosen": -5.28125, | |
| "logits/rejected": -3.4765625, | |
| "logps/chosen": -79.25, | |
| "logps/rejected": -458.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.96875, | |
| "rewards/margins": 22.8125, | |
| "rewards/rejected": -18.8125, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3038397328881469, | |
| "grad_norm": 4.4778818846680224e-05, | |
| "learning_rate": 0.00015683136850750236, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -3.28125, | |
| "logps/chosen": -104.25, | |
| "logps/rejected": -458.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4765625, | |
| "rewards/margins": 24.0625, | |
| "rewards/rejected": -20.5625, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3071786310517529, | |
| "grad_norm": 5.669149322784506e-05, | |
| "learning_rate": 0.00015675220134114712, | |
| "logits/chosen": -4.796875, | |
| "logits/rejected": -3.4765625, | |
| "logps/chosen": -94.25, | |
| "logps/rejected": -428.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.1796875, | |
| "rewards/margins": 23.1875, | |
| "rewards/rejected": -19.0625, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3105175292153589, | |
| "grad_norm": 7.771019227220677e-06, | |
| "learning_rate": 0.00015667207783963516, | |
| "logits/chosen": -5.09375, | |
| "logits/rejected": -3.359375, | |
| "logps/chosen": -102.25, | |
| "logps/rejected": -465.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.90625, | |
| "rewards/margins": 24.0625, | |
| "rewards/rejected": -20.125, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.31385642737896496, | |
| "grad_norm": 0.0015844438457861543, | |
| "learning_rate": 0.00015659099900130826, | |
| "logits/chosen": -4.9375, | |
| "logits/rejected": -3.265625, | |
| "logps/chosen": -97.25, | |
| "logps/rejected": -508.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8046875, | |
| "rewards/margins": 25.125, | |
| "rewards/rejected": -21.375, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.31719532554257096, | |
| "grad_norm": 0.0008710987749509513, | |
| "learning_rate": 0.00015650896583641158, | |
| "logits/chosen": -5.140625, | |
| "logits/rejected": -3.3046875, | |
| "logps/chosen": -102.5, | |
| "logps/rejected": -449.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6171875, | |
| "rewards/margins": 23.875, | |
| "rewards/rejected": -20.25, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.32053422370617696, | |
| "grad_norm": 2.450255806252244e-06, | |
| "learning_rate": 0.00015642597936708127, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -3.3515625, | |
| "logps/chosen": -124.5, | |
| "logps/rejected": -473.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.375, | |
| "rewards/margins": 25.0, | |
| "rewards/rejected": -20.625, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.32387312186978295, | |
| "grad_norm": 4.694379822467454e-05, | |
| "learning_rate": 0.00015634204062733167, | |
| "logits/chosen": -5.015625, | |
| "logits/rejected": -3.59375, | |
| "logps/chosen": -103.75, | |
| "logps/rejected": -385.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.6875, | |
| "rewards/margins": 23.0, | |
| "rewards/rejected": -18.3125, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.327212020033389, | |
| "grad_norm": 0.0004240713897161186, | |
| "learning_rate": 0.00015625715066304246, | |
| "logits/chosen": -4.9375, | |
| "logits/rejected": -3.671875, | |
| "logps/chosen": -103.75, | |
| "logps/rejected": -385.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.875, | |
| "rewards/margins": 22.125, | |
| "rewards/rejected": -18.3125, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.330550918196995, | |
| "grad_norm": 0.864057183265686, | |
| "learning_rate": 0.00015617131053194565, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -3.4453125, | |
| "logps/chosen": -128.0, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0811, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 3.671875, | |
| "rewards/margins": 22.625, | |
| "rewards/rejected": -18.9375, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.333889816360601, | |
| "grad_norm": 0.0037979809567332268, | |
| "learning_rate": 0.0001560845213036123, | |
| "logits/chosen": -4.65625, | |
| "logits/rejected": -3.65625, | |
| "logps/chosen": -113.5, | |
| "logps/rejected": -393.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.890625, | |
| "rewards/margins": 24.0625, | |
| "rewards/rejected": -20.25, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.337228714524207, | |
| "grad_norm": 0.0002999906719196588, | |
| "learning_rate": 0.00015599678405943927, | |
| "logits/chosen": -5.171875, | |
| "logits/rejected": -3.7890625, | |
| "logps/chosen": -104.75, | |
| "logps/rejected": -471.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.7890625, | |
| "rewards/margins": 27.0625, | |
| "rewards/rejected": -23.25, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.34056761268781305, | |
| "grad_norm": 0.38996586203575134, | |
| "learning_rate": 0.00015590809989263576, | |
| "logits/chosen": -5.265625, | |
| "logits/rejected": -3.3359375, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -461.0, | |
| "loss": 0.0173, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 3.4140625, | |
| "rewards/margins": 24.8125, | |
| "rewards/rejected": -21.4375, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.34390651085141904, | |
| "grad_norm": 0.0001715045509627089, | |
| "learning_rate": 0.00015581846990820965, | |
| "logits/chosen": -5.375, | |
| "logits/rejected": -3.953125, | |
| "logps/chosen": -119.75, | |
| "logps/rejected": -469.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8359375, | |
| "rewards/margins": 27.1875, | |
| "rewards/rejected": -23.3125, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.34724540901502504, | |
| "grad_norm": 0.0018466322217136621, | |
| "learning_rate": 0.00015572789522295372, | |
| "logits/chosen": -5.21875, | |
| "logits/rejected": -4.0390625, | |
| "logps/chosen": -117.25, | |
| "logps/rejected": -507.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6015625, | |
| "rewards/margins": 29.3125, | |
| "rewards/rejected": -25.625, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.35058430717863104, | |
| "grad_norm": 1.7793492588680238e-05, | |
| "learning_rate": 0.00015563637696543173, | |
| "logits/chosen": -5.375, | |
| "logits/rejected": -3.765625, | |
| "logps/chosen": -122.5, | |
| "logps/rejected": -574.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1953125, | |
| "rewards/margins": 32.0, | |
| "rewards/rejected": -28.75, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.35392320534223703, | |
| "grad_norm": 0.0019883729983121157, | |
| "learning_rate": 0.00015554391627596446, | |
| "logits/chosen": -5.75, | |
| "logits/rejected": -3.84375, | |
| "logps/chosen": -74.75, | |
| "logps/rejected": -558.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4296875, | |
| "rewards/margins": 30.75, | |
| "rewards/rejected": -27.25, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3572621035058431, | |
| "grad_norm": 0.0002132615481968969, | |
| "learning_rate": 0.0001554505143066154, | |
| "logits/chosen": -5.515625, | |
| "logits/rejected": -3.8984375, | |
| "logps/chosen": -118.5, | |
| "logps/rejected": -496.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.265625, | |
| "rewards/margins": 30.0, | |
| "rewards/rejected": -25.75, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3606010016694491, | |
| "grad_norm": 0.018147334456443787, | |
| "learning_rate": 0.0001553561722211764, | |
| "logits/chosen": -5.5625, | |
| "logits/rejected": -3.9375, | |
| "logps/chosen": -98.75, | |
| "logps/rejected": -535.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8828125, | |
| "rewards/margins": 32.5, | |
| "rewards/rejected": -28.5625, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3639398998330551, | |
| "grad_norm": 0.00014658304280601442, | |
| "learning_rate": 0.00015526089119515316, | |
| "logits/chosen": -5.796875, | |
| "logits/rejected": -3.875, | |
| "logps/chosen": -107.5, | |
| "logps/rejected": -476.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.1171875, | |
| "rewards/margins": 30.375, | |
| "rewards/rejected": -26.25, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3672787979966611, | |
| "grad_norm": 0.5631054043769836, | |
| "learning_rate": 0.00015516467241575066, | |
| "logits/chosen": -5.46875, | |
| "logits/rejected": -3.9453125, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.063, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 4.0703125, | |
| "rewards/margins": 32.875, | |
| "rewards/rejected": -28.75, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.37061769616026713, | |
| "grad_norm": 0.0015704554971307516, | |
| "learning_rate": 0.00015506751708185837, | |
| "logits/chosen": -5.359375, | |
| "logits/rejected": -3.9609375, | |
| "logps/chosen": -97.0, | |
| "logps/rejected": -463.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.515625, | |
| "rewards/margins": 29.625, | |
| "rewards/rejected": -25.125, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.3739565943238731, | |
| "grad_norm": 0.03998275473713875, | |
| "learning_rate": 0.00015496942640403515, | |
| "logits/chosen": -5.5625, | |
| "logits/rejected": -3.6484375, | |
| "logps/chosen": -90.0, | |
| "logps/rejected": -502.0, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.375, | |
| "rewards/margins": 30.125, | |
| "rewards/rejected": -26.6875, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3772954924874791, | |
| "grad_norm": 1.5272264136001468e-05, | |
| "learning_rate": 0.00015487040160449433, | |
| "logits/chosen": -5.21875, | |
| "logits/rejected": -3.859375, | |
| "logps/chosen": -108.0, | |
| "logps/rejected": -489.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.109375, | |
| "rewards/margins": 29.0625, | |
| "rewards/rejected": -24.9375, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3806343906510851, | |
| "grad_norm": 2.4422410206170753e-05, | |
| "learning_rate": 0.00015477044391708848, | |
| "logits/chosen": -5.28125, | |
| "logits/rejected": -3.7890625, | |
| "logps/chosen": -119.0, | |
| "logps/rejected": -501.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.375, | |
| "rewards/margins": 30.1875, | |
| "rewards/rejected": -25.8125, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.38397328881469117, | |
| "grad_norm": 0.009292054921388626, | |
| "learning_rate": 0.00015466955458729386, | |
| "logits/chosen": -5.640625, | |
| "logits/rejected": -3.6484375, | |
| "logps/chosen": -84.75, | |
| "logps/rejected": -505.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.1171875, | |
| "rewards/margins": 29.5, | |
| "rewards/rejected": -25.3125, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.38731218697829717, | |
| "grad_norm": 0.016696617007255554, | |
| "learning_rate": 0.00015456773487219517, | |
| "logits/chosen": -5.109375, | |
| "logits/rejected": -3.640625, | |
| "logps/chosen": -92.5, | |
| "logps/rejected": -484.0, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.28125, | |
| "rewards/margins": 29.375, | |
| "rewards/rejected": -25.0625, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.39065108514190316, | |
| "grad_norm": 3.30902221321594e-05, | |
| "learning_rate": 0.00015446498604046967, | |
| "logits/chosen": -4.9375, | |
| "logits/rejected": -3.3359375, | |
| "logps/chosen": -110.25, | |
| "logps/rejected": -536.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.453125, | |
| "rewards/margins": 29.875, | |
| "rewards/rejected": -25.5, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.39398998330550916, | |
| "grad_norm": 0.007862205617129803, | |
| "learning_rate": 0.00015436130937237144, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -3.265625, | |
| "logps/chosen": -102.5, | |
| "logps/rejected": -476.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.703125, | |
| "rewards/margins": 29.0, | |
| "rewards/rejected": -24.25, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3973288814691152, | |
| "grad_norm": 0.00031752747599966824, | |
| "learning_rate": 0.00015425670615971544, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -3.0859375, | |
| "logps/chosen": -105.75, | |
| "logps/rejected": -473.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.78125, | |
| "rewards/margins": 29.6875, | |
| "rewards/rejected": -24.9375, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4006677796327212, | |
| "grad_norm": 3.322392512927763e-05, | |
| "learning_rate": 0.00015415117770586144, | |
| "logits/chosen": -5.234375, | |
| "logits/rejected": -3.1875, | |
| "logps/chosen": -75.25, | |
| "logps/rejected": -491.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.9375, | |
| "rewards/margins": 29.8125, | |
| "rewards/rejected": -24.8125, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4040066777963272, | |
| "grad_norm": 0.00014395274047274143, | |
| "learning_rate": 0.00015404472532569771, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -3.109375, | |
| "logps/chosen": -107.0, | |
| "logps/rejected": -468.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.90625, | |
| "rewards/margins": 28.4375, | |
| "rewards/rejected": -23.5625, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4073455759599332, | |
| "grad_norm": 3.165722773701418e-06, | |
| "learning_rate": 0.0001539373503456247, | |
| "logits/chosen": -4.765625, | |
| "logits/rejected": -3.125, | |
| "logps/chosen": -110.25, | |
| "logps/rejected": -497.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.875, | |
| "rewards/margins": 28.125, | |
| "rewards/rejected": -23.1875, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.41068447412353926, | |
| "grad_norm": 0.05887475982308388, | |
| "learning_rate": 0.00015382905410353846, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -3.375, | |
| "logps/chosen": -121.0, | |
| "logps/rejected": -435.0, | |
| "loss": 0.0035, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.03125, | |
| "rewards/margins": 24.9375, | |
| "rewards/rejected": -20.9375, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.41402337228714525, | |
| "grad_norm": 0.0702575147151947, | |
| "learning_rate": 0.00015371983794881404, | |
| "logits/chosen": -4.890625, | |
| "logits/rejected": -3.0546875, | |
| "logps/chosen": -69.25, | |
| "logps/rejected": -460.0, | |
| "loss": 0.0017, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.84375, | |
| "rewards/margins": 26.8125, | |
| "rewards/rejected": -23.0, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.41736227045075125, | |
| "grad_norm": 0.01235484890639782, | |
| "learning_rate": 0.0001536097032422886, | |
| "logits/chosen": -4.9375, | |
| "logits/rejected": -2.921875, | |
| "logps/chosen": -102.5, | |
| "logps/rejected": -492.0, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.59375, | |
| "rewards/margins": 27.1875, | |
| "rewards/rejected": -22.625, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.42070116861435725, | |
| "grad_norm": 0.0033392056357115507, | |
| "learning_rate": 0.00015349865135624448, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -2.8125, | |
| "logps/chosen": -93.5, | |
| "logps/rejected": -439.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.34375, | |
| "rewards/margins": 25.375, | |
| "rewards/rejected": -21.0, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.4240400667779633, | |
| "grad_norm": 0.0017836507176980376, | |
| "learning_rate": 0.0001533866836743922, | |
| "logits/chosen": -5.21875, | |
| "logits/rejected": -3.15625, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -440.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 27.5, | |
| "rewards/rejected": -22.3125, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.4273789649415693, | |
| "grad_norm": 1.02886324384599e-05, | |
| "learning_rate": 0.00015327380159185295, | |
| "logits/chosen": -4.8125, | |
| "logits/rejected": -3.0, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -496.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.90625, | |
| "rewards/margins": 28.75, | |
| "rewards/rejected": -23.875, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.4307178631051753, | |
| "grad_norm": 0.0003149933472741395, | |
| "learning_rate": 0.00015316000651514157, | |
| "logits/chosen": -5.140625, | |
| "logits/rejected": -2.9765625, | |
| "logps/chosen": -82.25, | |
| "logps/rejected": -471.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.78125, | |
| "rewards/margins": 27.5625, | |
| "rewards/rejected": -22.75, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.4340567612687813, | |
| "grad_norm": 5.283300197334029e-05, | |
| "learning_rate": 0.0001530452998621487, | |
| "logits/chosen": -4.8125, | |
| "logits/rejected": -3.0390625, | |
| "logps/chosen": -112.0, | |
| "logps/rejected": -428.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.859375, | |
| "rewards/margins": 26.0, | |
| "rewards/rejected": -21.125, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4373956594323873, | |
| "grad_norm": 7.664081931579858e-05, | |
| "learning_rate": 0.00015292968306212336, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.9375, | |
| "logps/chosen": -101.5, | |
| "logps/rejected": -450.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.203125, | |
| "rewards/margins": 26.875, | |
| "rewards/rejected": -21.6875, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.44073455759599334, | |
| "grad_norm": 0.00020385748939588666, | |
| "learning_rate": 0.00015281315755565498, | |
| "logits/chosen": -4.765625, | |
| "logits/rejected": -3.1015625, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -448.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.859375, | |
| "rewards/margins": 27.125, | |
| "rewards/rejected": -22.25, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.44407345575959933, | |
| "grad_norm": 0.000807323376648128, | |
| "learning_rate": 0.0001526957247946555, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -2.75, | |
| "logps/chosen": -98.75, | |
| "logps/rejected": -494.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.765625, | |
| "rewards/margins": 28.4375, | |
| "rewards/rejected": -23.6875, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.44741235392320533, | |
| "grad_norm": 0.0005043560522608459, | |
| "learning_rate": 0.0001525773862423413, | |
| "logits/chosen": -4.984375, | |
| "logits/rejected": -2.921875, | |
| "logps/chosen": -100.0, | |
| "logps/rejected": -462.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.578125, | |
| "rewards/margins": 27.375, | |
| "rewards/rejected": -22.8125, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.4507512520868113, | |
| "grad_norm": 0.0002074016520055011, | |
| "learning_rate": 0.00015245814337321492, | |
| "logits/chosen": -4.8125, | |
| "logits/rejected": -2.71875, | |
| "logps/chosen": -106.0, | |
| "logps/rejected": -532.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.640625, | |
| "rewards/margins": 29.4375, | |
| "rewards/rejected": -24.8125, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4540901502504174, | |
| "grad_norm": 8.115387754514813e-05, | |
| "learning_rate": 0.0001523379976730468, | |
| "logits/chosen": -4.90625, | |
| "logits/rejected": -2.90625, | |
| "logps/chosen": -88.5, | |
| "logps/rejected": -502.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.8125, | |
| "rewards/margins": 28.5, | |
| "rewards/rejected": -23.6875, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.4574290484140234, | |
| "grad_norm": 3.2275711419060826e-06, | |
| "learning_rate": 0.00015221695063885664, | |
| "logits/chosen": -4.75, | |
| "logits/rejected": -2.8046875, | |
| "logps/chosen": -85.0, | |
| "logps/rejected": -494.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.03125, | |
| "rewards/margins": 29.1875, | |
| "rewards/rejected": -24.1875, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.4607679465776294, | |
| "grad_norm": 0.0006476023118011653, | |
| "learning_rate": 0.00015209500377889472, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.8359375, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -498.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.671875, | |
| "rewards/margins": 28.5, | |
| "rewards/rejected": -23.8125, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.46410684474123537, | |
| "grad_norm": 4.4746982894139364e-05, | |
| "learning_rate": 0.00015197215861262325, | |
| "logits/chosen": -4.796875, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -102.0, | |
| "logps/rejected": -566.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.140625, | |
| "rewards/margins": 30.875, | |
| "rewards/rejected": -25.75, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4674457429048414, | |
| "grad_norm": 0.05361659452319145, | |
| "learning_rate": 0.00015184841667069748, | |
| "logits/chosen": -4.796875, | |
| "logits/rejected": -2.9609375, | |
| "logps/chosen": -98.75, | |
| "logps/rejected": -447.0, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.78125, | |
| "rewards/margins": 28.0625, | |
| "rewards/rejected": -23.25, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4707846410684474, | |
| "grad_norm": 0.00023327009694185108, | |
| "learning_rate": 0.0001517237794949463, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -3.03125, | |
| "logps/chosen": -103.5, | |
| "logps/rejected": -476.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.4453125, | |
| "rewards/margins": 28.5625, | |
| "rewards/rejected": -24.125, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4741235392320534, | |
| "grad_norm": 0.2315979301929474, | |
| "learning_rate": 0.00015159824863835336, | |
| "logits/chosen": -5.484375, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -76.5, | |
| "logps/rejected": -536.0, | |
| "loss": 0.0126, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 3.59375, | |
| "rewards/margins": 29.0, | |
| "rewards/rejected": -25.4375, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4774624373956594, | |
| "grad_norm": 0.1798364669084549, | |
| "learning_rate": 0.00015147182566503764, | |
| "logits/chosen": -5.078125, | |
| "logits/rejected": -2.9296875, | |
| "logps/chosen": -101.25, | |
| "logps/rejected": -478.0, | |
| "loss": 0.0058, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.078125, | |
| "rewards/margins": 28.4375, | |
| "rewards/rejected": -23.4375, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.48080133555926546, | |
| "grad_norm": 2.0006180420750752e-05, | |
| "learning_rate": 0.00015134451215023385, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.96875, | |
| "logps/chosen": -103.0, | |
| "logps/rejected": -493.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.109375, | |
| "rewards/margins": 29.0, | |
| "rewards/rejected": -23.875, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.48414023372287146, | |
| "grad_norm": 0.0023838214110583067, | |
| "learning_rate": 0.0001512163096802729, | |
| "logits/chosen": -4.96875, | |
| "logits/rejected": -2.828125, | |
| "logps/chosen": -107.25, | |
| "logps/rejected": -492.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.078125, | |
| "rewards/margins": 28.0625, | |
| "rewards/rejected": -23.9375, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.48747913188647746, | |
| "grad_norm": 0.006409293040633202, | |
| "learning_rate": 0.00015108721985256215, | |
| "logits/chosen": -4.765625, | |
| "logits/rejected": -2.734375, | |
| "logps/chosen": -79.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.625, | |
| "rewards/margins": 31.6875, | |
| "rewards/rejected": -27.0, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.49081803005008345, | |
| "grad_norm": 0.00015124543278943747, | |
| "learning_rate": 0.00015095724427556544, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -2.8515625, | |
| "logps/chosen": -102.25, | |
| "logps/rejected": -460.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.859375, | |
| "rewards/margins": 27.0625, | |
| "rewards/rejected": -22.1875, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4941569282136895, | |
| "grad_norm": 0.00012339219392742962, | |
| "learning_rate": 0.00015082638456878308, | |
| "logits/chosen": -4.75, | |
| "logits/rejected": -2.9921875, | |
| "logps/chosen": -89.0, | |
| "logps/rejected": -481.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.2265625, | |
| "rewards/margins": 28.9375, | |
| "rewards/rejected": -24.6875, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4974958263772955, | |
| "grad_norm": 1.230505313287722e-05, | |
| "learning_rate": 0.0001506946423627316, | |
| "logits/chosen": -5.0, | |
| "logits/rejected": -3.1171875, | |
| "logps/chosen": -82.5, | |
| "logps/rejected": -489.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.59375, | |
| "rewards/margins": 29.125, | |
| "rewards/rejected": -24.5, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.5008347245409015, | |
| "grad_norm": 2.1004785594413988e-05, | |
| "learning_rate": 0.00015056201929892368, | |
| "logits/chosen": -4.25, | |
| "logits/rejected": -2.9765625, | |
| "logps/chosen": -112.25, | |
| "logps/rejected": -434.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.171875, | |
| "rewards/margins": 27.0625, | |
| "rewards/rejected": -22.875, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5041736227045075, | |
| "grad_norm": 0.00019315003009978682, | |
| "learning_rate": 0.00015042851702984732, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -2.875, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -448.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.625, | |
| "rewards/margins": 27.5625, | |
| "rewards/rejected": -22.9375, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.5075125208681135, | |
| "grad_norm": 0.003308866871520877, | |
| "learning_rate": 0.00015029413721894558, | |
| "logits/chosen": -4.75, | |
| "logits/rejected": -2.984375, | |
| "logps/chosen": -122.5, | |
| "logps/rejected": -488.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.609375, | |
| "rewards/margins": 28.625, | |
| "rewards/rejected": -24.0625, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5108514190317195, | |
| "grad_norm": 5.397659606387606e-06, | |
| "learning_rate": 0.00015015888154059568, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -3.1640625, | |
| "logps/chosen": -101.5, | |
| "logps/rejected": -505.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.75, | |
| "rewards/margins": 29.3125, | |
| "rewards/rejected": -24.5625, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.5141903171953256, | |
| "grad_norm": 0.10684552043676376, | |
| "learning_rate": 0.00015002275168008816, | |
| "logits/chosen": -5.46875, | |
| "logits/rejected": -3.015625, | |
| "logps/chosen": -71.75, | |
| "logps/rejected": -500.0, | |
| "loss": 0.005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.2578125, | |
| "rewards/margins": 29.4375, | |
| "rewards/rejected": -25.125, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5175292153589316, | |
| "grad_norm": 3.8425196180469356e-07, | |
| "learning_rate": 0.00014988574933360593, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.9375, | |
| "logps/chosen": -101.25, | |
| "logps/rejected": -507.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.765625, | |
| "rewards/margins": 29.75, | |
| "rewards/rejected": -25.0, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5208681135225376, | |
| "grad_norm": 7.517022822867148e-06, | |
| "learning_rate": 0.0001497478762082031, | |
| "logits/chosen": -5.125, | |
| "logits/rejected": -2.609375, | |
| "logps/chosen": -96.75, | |
| "logps/rejected": -532.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.515625, | |
| "rewards/margins": 32.0, | |
| "rewards/rejected": -27.4375, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5242070116861436, | |
| "grad_norm": 0.00013846807996742427, | |
| "learning_rate": 0.00014960913402178373, | |
| "logits/chosen": -5.328125, | |
| "logits/rejected": -2.953125, | |
| "logps/chosen": -86.25, | |
| "logps/rejected": -520.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.015625, | |
| "rewards/margins": 30.5625, | |
| "rewards/rejected": -25.5625, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5275459098497496, | |
| "grad_norm": 3.7317280657589436e-05, | |
| "learning_rate": 0.00014946952450308035, | |
| "logits/chosen": -5.03125, | |
| "logits/rejected": -3.0859375, | |
| "logps/chosen": -83.5, | |
| "logps/rejected": -505.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.546875, | |
| "rewards/margins": 30.625, | |
| "rewards/rejected": -26.0625, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5308848080133556, | |
| "grad_norm": 1.3853728887625039e-05, | |
| "learning_rate": 0.00014932904939163257, | |
| "logits/chosen": -5.125, | |
| "logits/rejected": -2.8359375, | |
| "logps/chosen": -76.5, | |
| "logps/rejected": -490.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.859375, | |
| "rewards/margins": 29.8125, | |
| "rewards/rejected": -25.0625, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5342237061769616, | |
| "grad_norm": 6.569054676219821e-06, | |
| "learning_rate": 0.00014918771043776524, | |
| "logits/chosen": -4.734375, | |
| "logits/rejected": -3.0859375, | |
| "logps/chosen": -116.75, | |
| "logps/rejected": -504.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.453125, | |
| "rewards/margins": 29.75, | |
| "rewards/rejected": -25.25, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5375626043405676, | |
| "grad_norm": 0.018823888152837753, | |
| "learning_rate": 0.00014904550940256675, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.8203125, | |
| "logps/chosen": -103.75, | |
| "logps/rejected": -521.0, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.953125, | |
| "rewards/margins": 30.125, | |
| "rewards/rejected": -25.1875, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5409015025041736, | |
| "grad_norm": 0.01165434904396534, | |
| "learning_rate": 0.00014890244805786706, | |
| "logits/chosen": -5.03125, | |
| "logits/rejected": -2.9921875, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -465.0, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.46875, | |
| "rewards/margins": 29.375, | |
| "rewards/rejected": -24.875, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5442404006677797, | |
| "grad_norm": 2.0660480004153214e-05, | |
| "learning_rate": 0.00014875852818621563, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.8671875, | |
| "logps/chosen": -112.0, | |
| "logps/rejected": -467.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.6875, | |
| "rewards/margins": 29.5625, | |
| "rewards/rejected": -24.8125, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.5475792988313857, | |
| "grad_norm": 0.002862096531316638, | |
| "learning_rate": 0.00014861375158085915, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.9921875, | |
| "logps/chosen": -110.75, | |
| "logps/rejected": -487.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.703125, | |
| "rewards/margins": 29.5625, | |
| "rewards/rejected": -24.8125, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5509181969949917, | |
| "grad_norm": 0.0027916012331843376, | |
| "learning_rate": 0.00014846812004571928, | |
| "logits/chosen": -4.953125, | |
| "logits/rejected": -3.1015625, | |
| "logps/chosen": -92.25, | |
| "logps/rejected": -482.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.234375, | |
| "rewards/margins": 29.375, | |
| "rewards/rejected": -25.125, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5542570951585977, | |
| "grad_norm": 0.00024799967650324106, | |
| "learning_rate": 0.0001483216353953701, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.9296875, | |
| "logps/chosen": -108.0, | |
| "logps/rejected": -451.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.6875, | |
| "rewards/margins": 28.5625, | |
| "rewards/rejected": -23.9375, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5575959933222037, | |
| "grad_norm": 0.0007697382825426757, | |
| "learning_rate": 0.00014817429945501563, | |
| "logits/chosen": -4.578125, | |
| "logits/rejected": -2.875, | |
| "logps/chosen": -109.5, | |
| "logps/rejected": -505.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.0859375, | |
| "rewards/margins": 29.0, | |
| "rewards/rejected": -24.875, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5609348914858097, | |
| "grad_norm": 3.113675120403059e-05, | |
| "learning_rate": 0.00014802611406046685, | |
| "logits/chosen": -4.953125, | |
| "logits/rejected": -3.140625, | |
| "logps/chosen": -89.5, | |
| "logps/rejected": -470.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.8125, | |
| "rewards/margins": 29.25, | |
| "rewards/rejected": -24.5, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5642737896494157, | |
| "grad_norm": 0.0006815637461841106, | |
| "learning_rate": 0.00014787708105811905, | |
| "logits/chosen": -4.984375, | |
| "logits/rejected": -2.984375, | |
| "logps/chosen": -88.375, | |
| "logps/rejected": -471.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.859375, | |
| "rewards/margins": 29.375, | |
| "rewards/rejected": -24.5, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5676126878130217, | |
| "grad_norm": 0.00014252612891141325, | |
| "learning_rate": 0.00014772720230492878, | |
| "logits/chosen": -5.15625, | |
| "logits/rejected": -2.8203125, | |
| "logps/chosen": -103.25, | |
| "logps/rejected": -525.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.703125, | |
| "rewards/margins": 30.3125, | |
| "rewards/rejected": -25.625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5709515859766278, | |
| "grad_norm": 2.218190456915181e-05, | |
| "learning_rate": 0.00014757647966839058, | |
| "logits/chosen": -5.0, | |
| "logits/rejected": -2.8359375, | |
| "logps/chosen": -81.875, | |
| "logps/rejected": -514.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.15625, | |
| "rewards/margins": 30.375, | |
| "rewards/rejected": -26.25, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5742904841402338, | |
| "grad_norm": 0.00024572337861172855, | |
| "learning_rate": 0.0001474249150265139, | |
| "logits/chosen": -4.921875, | |
| "logits/rejected": -2.921875, | |
| "logps/chosen": -77.125, | |
| "logps/rejected": -491.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.734375, | |
| "rewards/margins": 29.8125, | |
| "rewards/rejected": -25.0625, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5776293823038398, | |
| "grad_norm": 0.062237586826086044, | |
| "learning_rate": 0.00014727251026779953, | |
| "logits/chosen": -4.921875, | |
| "logits/rejected": -3.0625, | |
| "logps/chosen": -104.25, | |
| "logps/rejected": -480.0, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.84375, | |
| "rewards/margins": 28.3125, | |
| "rewards/rejected": -24.5, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5809682804674458, | |
| "grad_norm": 0.010632738471031189, | |
| "learning_rate": 0.0001471192672912162, | |
| "logits/chosen": -5.0, | |
| "logits/rejected": -2.859375, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -540.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.53125, | |
| "rewards/margins": 31.75, | |
| "rewards/rejected": -27.25, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5843071786310517, | |
| "grad_norm": 0.004395844414830208, | |
| "learning_rate": 0.00014696518800617686, | |
| "logits/chosen": -4.796875, | |
| "logits/rejected": -3.109375, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -480.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.703125, | |
| "rewards/margins": 27.8125, | |
| "rewards/rejected": -24.0625, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5876460767946577, | |
| "grad_norm": 0.0008318977779708803, | |
| "learning_rate": 0.00014681027433251486, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -3.078125, | |
| "logps/chosen": -108.25, | |
| "logps/rejected": -473.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.765625, | |
| "rewards/margins": 29.4375, | |
| "rewards/rejected": -24.625, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5909849749582637, | |
| "grad_norm": 0.47385621070861816, | |
| "learning_rate": 0.00014665452820046006, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -3.015625, | |
| "logps/chosen": -92.25, | |
| "logps/rejected": -467.0, | |
| "loss": 0.0391, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 4.125, | |
| "rewards/margins": 27.625, | |
| "rewards/rejected": -23.5625, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5943238731218697, | |
| "grad_norm": 2.916028734034626e-07, | |
| "learning_rate": 0.00014649795155061485, | |
| "logits/chosen": -4.921875, | |
| "logits/rejected": -2.828125, | |
| "logps/chosen": -86.25, | |
| "logps/rejected": -518.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.65625, | |
| "rewards/margins": 30.625, | |
| "rewards/rejected": -26.0, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5976627712854758, | |
| "grad_norm": 0.06330767273902893, | |
| "learning_rate": 0.00014634054633392982, | |
| "logits/chosen": -4.5, | |
| "logits/rejected": -2.75, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -497.0, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.875, | |
| "rewards/margins": 29.6875, | |
| "rewards/rejected": -24.875, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.6010016694490818, | |
| "grad_norm": 0.0008164051687344909, | |
| "learning_rate": 0.00014618231451167955, | |
| "logits/chosen": -4.640625, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -432.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.46875, | |
| "rewards/margins": 26.5625, | |
| "rewards/rejected": -21.0625, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6043405676126878, | |
| "grad_norm": 6.780491821700707e-05, | |
| "learning_rate": 0.00014602325805543822, | |
| "logits/chosen": -4.265625, | |
| "logits/rejected": -2.84375, | |
| "logps/chosen": -97.25, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.875, | |
| "rewards/margins": 27.5, | |
| "rewards/rejected": -22.5625, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.6076794657762938, | |
| "grad_norm": 4.8602585593471304e-05, | |
| "learning_rate": 0.00014586337894705487, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.78125, | |
| "logps/chosen": -94.0, | |
| "logps/rejected": -409.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.140625, | |
| "rewards/margins": 26.125, | |
| "rewards/rejected": -21.0, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.6110183639398998, | |
| "grad_norm": 0.0002642914478201419, | |
| "learning_rate": 0.00014570267917862891, | |
| "logits/chosen": -4.421875, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -99.25, | |
| "logps/rejected": -434.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 24.4375, | |
| "rewards/rejected": -19.25, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.6143572621035058, | |
| "grad_norm": 0.0028488298412412405, | |
| "learning_rate": 0.00014554116075248514, | |
| "logits/chosen": -4.171875, | |
| "logits/rejected": -2.6171875, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -429.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.015625, | |
| "rewards/margins": 25.125, | |
| "rewards/rejected": -20.125, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.6176961602671118, | |
| "grad_norm": 0.00043352670036256313, | |
| "learning_rate": 0.0001453788256811489, | |
| "logits/chosen": -4.078125, | |
| "logits/rejected": -2.7578125, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -406.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 24.6875, | |
| "rewards/rejected": -18.6875, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6210350584307178, | |
| "grad_norm": 0.0006578704342246056, | |
| "learning_rate": 0.00014521567598732097, | |
| "logits/chosen": -4.2421875, | |
| "logits/rejected": -2.515625, | |
| "logps/chosen": -83.0, | |
| "logps/rejected": -400.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 23.375, | |
| "rewards/rejected": -17.6875, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.6243739565943238, | |
| "grad_norm": 0.0001582528348080814, | |
| "learning_rate": 0.00014505171370385233, | |
| "logits/chosen": -4.2265625, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -87.25, | |
| "logps/rejected": -462.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.125, | |
| "rewards/margins": 26.0625, | |
| "rewards/rejected": -19.9375, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6277128547579299, | |
| "grad_norm": 2.958109871542547e-05, | |
| "learning_rate": 0.00014488694087371883, | |
| "logits/chosen": -4.234375, | |
| "logits/rejected": -2.1796875, | |
| "logps/chosen": -85.75, | |
| "logps/rejected": -447.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.078125, | |
| "rewards/margins": 25.5625, | |
| "rewards/rejected": -19.5, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6310517529215359, | |
| "grad_norm": 0.0010354547994211316, | |
| "learning_rate": 0.00014472135954999581, | |
| "logits/chosen": -4.0, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -90.75, | |
| "logps/rejected": -452.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 25.4375, | |
| "rewards/rejected": -19.9375, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.6343906510851419, | |
| "grad_norm": 0.0017567307222634554, | |
| "learning_rate": 0.00014455497179583244, | |
| "logits/chosen": -4.3125, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -79.0, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.09375, | |
| "rewards/margins": 23.875, | |
| "rewards/rejected": -18.8125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6377295492487479, | |
| "grad_norm": 1.4471517715719528e-05, | |
| "learning_rate": 0.00014438777968442607, | |
| "logits/chosen": -3.8671875, | |
| "logits/rejected": -2.5390625, | |
| "logps/chosen": -99.75, | |
| "logps/rejected": -490.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.640625, | |
| "rewards/margins": 25.8125, | |
| "rewards/rejected": -20.1875, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6410684474123539, | |
| "grad_norm": 1.1293011993984692e-05, | |
| "learning_rate": 0.00014421978529899633, | |
| "logits/chosen": -3.8046875, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -70.375, | |
| "logps/rejected": -495.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.09375, | |
| "rewards/margins": 26.25, | |
| "rewards/rejected": -21.1875, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6444073455759599, | |
| "grad_norm": 0.031924691051244736, | |
| "learning_rate": 0.00014405099073275924, | |
| "logits/chosen": -3.953125, | |
| "logits/rejected": -2.3359375, | |
| "logps/chosen": -111.0, | |
| "logps/rejected": -445.0, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.71875, | |
| "rewards/margins": 24.75, | |
| "rewards/rejected": -19.0625, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6477462437395659, | |
| "grad_norm": 8.30372482596431e-06, | |
| "learning_rate": 0.00014388139808890112, | |
| "logits/chosen": -3.796875, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -434.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.234375, | |
| "rewards/margins": 24.5625, | |
| "rewards/rejected": -18.375, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.6510851419031719, | |
| "grad_norm": 0.016556670889258385, | |
| "learning_rate": 0.00014371100948055226, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -88.5, | |
| "logps/rejected": -417.0, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.015625, | |
| "rewards/margins": 23.25, | |
| "rewards/rejected": -18.25, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.654424040066778, | |
| "grad_norm": 0.0003383951261639595, | |
| "learning_rate": 0.0001435398270307609, | |
| "logits/chosen": -3.8203125, | |
| "logits/rejected": -2.5625, | |
| "logps/chosen": -96.25, | |
| "logps/rejected": -386.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 23.625, | |
| "rewards/rejected": -17.625, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.657762938230384, | |
| "grad_norm": 0.00017920513346325606, | |
| "learning_rate": 0.00014336785287246632, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -78.75, | |
| "logps/rejected": -421.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 24.0, | |
| "rewards/rejected": -18.125, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.66110183639399, | |
| "grad_norm": 0.0014842700911685824, | |
| "learning_rate": 0.00014319508914847274, | |
| "logits/chosen": -3.640625, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -107.5, | |
| "logps/rejected": -433.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 23.0625, | |
| "rewards/rejected": -17.4375, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.664440734557596, | |
| "grad_norm": 0.0002902350970543921, | |
| "learning_rate": 0.00014302153801142226, | |
| "logits/chosen": -3.671875, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -105.25, | |
| "logps/rejected": -386.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.234375, | |
| "rewards/margins": 23.0, | |
| "rewards/rejected": -16.6875, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.667779632721202, | |
| "grad_norm": 0.000622183782979846, | |
| "learning_rate": 0.00014284720162376823, | |
| "logits/chosen": -4.0546875, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -93.0, | |
| "logps/rejected": -482.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.34375, | |
| "rewards/margins": 24.6875, | |
| "rewards/rejected": -19.3125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.671118530884808, | |
| "grad_norm": 0.032146863639354706, | |
| "learning_rate": 0.0001426720821577482, | |
| "logits/chosen": -4.1171875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -70.0, | |
| "logps/rejected": -463.0, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.25, | |
| "rewards/margins": 24.3125, | |
| "rewards/rejected": -19.0625, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.674457429048414, | |
| "grad_norm": 0.0003144640941172838, | |
| "learning_rate": 0.000142496181795357, | |
| "logits/chosen": -4.125, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -429.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 23.3125, | |
| "rewards/rejected": -17.625, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.67779632721202, | |
| "grad_norm": 0.00027810977189801633, | |
| "learning_rate": 0.00014231950272831936, | |
| "logits/chosen": -3.9375, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -92.5, | |
| "logps/rejected": -425.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 23.8125, | |
| "rewards/rejected": -18.6875, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.6811352253756261, | |
| "grad_norm": 0.12220592051744461, | |
| "learning_rate": 0.00014214204715806271, | |
| "logits/chosen": -3.8828125, | |
| "logits/rejected": -2.5234375, | |
| "logps/chosen": -100.5, | |
| "logps/rejected": -428.0, | |
| "loss": 0.008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 24.1875, | |
| "rewards/rejected": -18.25, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6844741235392321, | |
| "grad_norm": 1.1589469067985192e-05, | |
| "learning_rate": 0.00014196381729568983, | |
| "logits/chosen": -3.6640625, | |
| "logits/rejected": -2.3828125, | |
| "logps/chosen": -126.5, | |
| "logps/rejected": -467.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.015625, | |
| "rewards/margins": 25.3125, | |
| "rewards/rejected": -19.25, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6878130217028381, | |
| "grad_norm": 2.4499566279700957e-05, | |
| "learning_rate": 0.00014178481536195113, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.6015625, | |
| "logps/chosen": -71.25, | |
| "logps/rejected": -402.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 23.5, | |
| "rewards/rejected": -17.75, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6911519198664441, | |
| "grad_norm": 0.00026768725365400314, | |
| "learning_rate": 0.000141605043587217, | |
| "logits/chosen": -4.0546875, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -73.75, | |
| "logps/rejected": -426.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 23.4375, | |
| "rewards/rejected": -18.0625, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6944908180300501, | |
| "grad_norm": 4.719466232927516e-05, | |
| "learning_rate": 0.0001414245042114502, | |
| "logits/chosen": -4.0, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -62.75, | |
| "logps/rejected": -405.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 23.4375, | |
| "rewards/rejected": -17.75, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6978297161936561, | |
| "grad_norm": 0.0002969894267152995, | |
| "learning_rate": 0.00014124319948417773, | |
| "logits/chosen": -3.84375, | |
| "logits/rejected": -2.2734375, | |
| "logps/chosen": -88.5, | |
| "logps/rejected": -420.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.359375, | |
| "rewards/margins": 23.1875, | |
| "rewards/rejected": -17.875, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.7011686143572621, | |
| "grad_norm": 0.0002236636937595904, | |
| "learning_rate": 0.000141061131664463, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -71.25, | |
| "logps/rejected": -454.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5, | |
| "rewards/margins": 25.0, | |
| "rewards/rejected": -19.5, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7045075125208681, | |
| "grad_norm": 1.3881902305001859e-05, | |
| "learning_rate": 0.00014087830302087742, | |
| "logits/chosen": -3.875, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -76.0, | |
| "logps/rejected": -468.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 25.0625, | |
| "rewards/rejected": -19.5625, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.7078464106844741, | |
| "grad_norm": 0.018235376104712486, | |
| "learning_rate": 0.00014069471583147249, | |
| "logits/chosen": -3.71875, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -114.5, | |
| "logps/rejected": -384.0, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.15625, | |
| "rewards/margins": 21.75, | |
| "rewards/rejected": -16.625, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.7111853088480802, | |
| "grad_norm": 0.00014337942411657423, | |
| "learning_rate": 0.00014051037238375103, | |
| "logits/chosen": -3.7421875, | |
| "logits/rejected": -2.5234375, | |
| "logps/chosen": -102.0, | |
| "logps/rejected": -421.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4375, | |
| "rewards/margins": 23.1875, | |
| "rewards/rejected": -17.75, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.7145242070116862, | |
| "grad_norm": 3.2657169413141673e-06, | |
| "learning_rate": 0.00014032527497463901, | |
| "logits/chosen": -3.5234375, | |
| "logits/rejected": -2.2578125, | |
| "logps/chosen": -108.0, | |
| "logps/rejected": -398.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.796875, | |
| "rewards/margins": 23.0, | |
| "rewards/rejected": -17.1875, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.7178631051752922, | |
| "grad_norm": 0.0009526070207357407, | |
| "learning_rate": 0.00014013942591045668, | |
| "logits/chosen": -3.671875, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -423.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 22.75, | |
| "rewards/rejected": -16.9375, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7212020033388982, | |
| "grad_norm": 0.0013299849815666676, | |
| "learning_rate": 0.00013995282750689001, | |
| "logits/chosen": -3.6171875, | |
| "logits/rejected": -2.2578125, | |
| "logps/chosen": -91.625, | |
| "logps/rejected": -461.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 24.4375, | |
| "rewards/rejected": -18.75, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.7245409015025042, | |
| "grad_norm": 0.0005564424791373312, | |
| "learning_rate": 0.00013976548208896177, | |
| "logits/chosen": -3.3125, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -120.5, | |
| "logps/rejected": -394.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.203125, | |
| "rewards/margins": 23.5, | |
| "rewards/rejected": -17.3125, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.7278797996661102, | |
| "grad_norm": 0.0002660456520970911, | |
| "learning_rate": 0.00013957739199100248, | |
| "logits/chosen": -3.734375, | |
| "logits/rejected": -2.3203125, | |
| "logps/chosen": -87.5, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 24.125, | |
| "rewards/rejected": -18.6875, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.7312186978297162, | |
| "grad_norm": 8.14365193946287e-05, | |
| "learning_rate": 0.00013938855955662142, | |
| "logits/chosen": -3.140625, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -404.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.921875, | |
| "rewards/margins": 23.125, | |
| "rewards/rejected": -17.125, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7345575959933222, | |
| "grad_norm": 3.825878229690716e-05, | |
| "learning_rate": 0.00013919898713867754, | |
| "logits/chosen": -3.8359375, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -91.75, | |
| "logps/rejected": -420.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.1875, | |
| "rewards/margins": 23.8125, | |
| "rewards/rejected": -17.59375, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7378964941569283, | |
| "grad_norm": 0.0003690333687700331, | |
| "learning_rate": 0.00013900867709924978, | |
| "logits/chosen": -3.90625, | |
| "logits/rejected": -2.2734375, | |
| "logps/chosen": -94.25, | |
| "logps/rejected": -448.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 24.1875, | |
| "rewards/rejected": -18.8125, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.7412353923205343, | |
| "grad_norm": 0.0014959557447582483, | |
| "learning_rate": 0.00013881763180960809, | |
| "logits/chosen": -3.7890625, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -119.75, | |
| "logps/rejected": -417.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 23.8125, | |
| "rewards/rejected": -18.125, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.7445742904841403, | |
| "grad_norm": 0.0005811589653603733, | |
| "learning_rate": 0.00013862585365018352, | |
| "logits/chosen": -4.0, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -109.5, | |
| "logps/rejected": -429.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.8125, | |
| "rewards/margins": 23.6875, | |
| "rewards/rejected": -17.875, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7479131886477463, | |
| "grad_norm": 0.00026676716515794396, | |
| "learning_rate": 0.00013843334501053878, | |
| "logits/chosen": -3.921875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -88.75, | |
| "logps/rejected": -430.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.46875, | |
| "rewards/margins": 24.0, | |
| "rewards/rejected": -18.5, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7512520868113522, | |
| "grad_norm": 0.00046272281906567514, | |
| "learning_rate": 0.00013824010828933833, | |
| "logits/chosen": -3.96875, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -70.25, | |
| "logps/rejected": -453.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.53125, | |
| "rewards/margins": 24.8125, | |
| "rewards/rejected": -19.1875, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7545909849749582, | |
| "grad_norm": 0.0005486281588673592, | |
| "learning_rate": 0.0001380461458943186, | |
| "logits/chosen": -3.8046875, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -90.75, | |
| "logps/rejected": -458.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.453125, | |
| "rewards/margins": 25.1875, | |
| "rewards/rejected": -19.75, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7579298831385642, | |
| "grad_norm": 0.005520923994481564, | |
| "learning_rate": 0.00013785146024225797, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -94.0, | |
| "logps/rejected": -392.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 22.75, | |
| "rewards/rejected": -17.125, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7612687813021702, | |
| "grad_norm": 8.077368693193421e-05, | |
| "learning_rate": 0.0001376560537589465, | |
| "logits/chosen": -3.5625, | |
| "logits/rejected": -2.2734375, | |
| "logps/chosen": -82.0, | |
| "logps/rejected": -415.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.796875, | |
| "rewards/margins": 23.125, | |
| "rewards/rejected": -18.375, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7646076794657763, | |
| "grad_norm": 0.00024681369541212916, | |
| "learning_rate": 0.000137459928879156, | |
| "logits/chosen": -4.21875, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -76.25, | |
| "logps/rejected": -468.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.03125, | |
| "rewards/margins": 24.75, | |
| "rewards/rejected": -19.75, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7679465776293823, | |
| "grad_norm": 2.9520870157284662e-06, | |
| "learning_rate": 0.00013726308804660938, | |
| "logits/chosen": -3.796875, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -99.5, | |
| "logps/rejected": -455.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 25.3125, | |
| "rewards/rejected": -20.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7712854757929883, | |
| "grad_norm": 0.4641003906726837, | |
| "learning_rate": 0.00013706553371395044, | |
| "logits/chosen": -3.8046875, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -438.0, | |
| "loss": 0.0183, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.0625, | |
| "rewards/margins": 23.5, | |
| "rewards/rejected": -18.375, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.7746243739565943, | |
| "grad_norm": 0.0002731184067670256, | |
| "learning_rate": 0.00013686726834271316, | |
| "logits/chosen": -3.71875, | |
| "logits/rejected": -2.3203125, | |
| "logps/chosen": -89.25, | |
| "logps/rejected": -367.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 21.8125, | |
| "rewards/rejected": -16.03125, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.7779632721202003, | |
| "grad_norm": 0.02955230325460434, | |
| "learning_rate": 0.00013666829440329113, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -59.125, | |
| "logps/rejected": -431.0, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 23.625, | |
| "rewards/rejected": -18.375, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7813021702838063, | |
| "grad_norm": 3.0029235858819447e-05, | |
| "learning_rate": 0.00013646861437490674, | |
| "logits/chosen": -3.7265625, | |
| "logits/rejected": -2.2265625, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -416.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.625, | |
| "rewards/margins": 23.375, | |
| "rewards/rejected": -17.75, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7846410684474123, | |
| "grad_norm": 0.018931280821561813, | |
| "learning_rate": 0.00013626823074558019, | |
| "logits/chosen": -3.890625, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -89.0, | |
| "logps/rejected": -415.0, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.03125, | |
| "rewards/margins": 22.75, | |
| "rewards/rejected": -17.6875, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7879799666110183, | |
| "grad_norm": 3.846998879453167e-05, | |
| "learning_rate": 0.00013606714601209865, | |
| "logits/chosen": -3.5234375, | |
| "logits/rejected": -2.1953125, | |
| "logps/chosen": -92.0, | |
| "logps/rejected": -404.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 22.625, | |
| "rewards/rejected": -17.125, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7913188647746243, | |
| "grad_norm": 0.00047268884372897446, | |
| "learning_rate": 0.00013586536267998504, | |
| "logits/chosen": -3.96875, | |
| "logits/rejected": -2.1171875, | |
| "logps/chosen": -90.25, | |
| "logps/rejected": -482.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.109375, | |
| "rewards/margins": 25.9375, | |
| "rewards/rejected": -19.8125, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7946577629382304, | |
| "grad_norm": 0.00010255785309709609, | |
| "learning_rate": 0.00013566288326346683, | |
| "logits/chosen": -3.9765625, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -76.0, | |
| "logps/rejected": -383.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5, | |
| "rewards/margins": 22.9375, | |
| "rewards/rejected": -17.4375, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7979966611018364, | |
| "grad_norm": 0.00026641954900696874, | |
| "learning_rate": 0.0001354597102854448, | |
| "logits/chosen": -3.71875, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -88.25, | |
| "logps/rejected": -368.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 22.1875, | |
| "rewards/rejected": -16.3125, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.8013355592654424, | |
| "grad_norm": 0.0015892143128439784, | |
| "learning_rate": 0.00013525584627746142, | |
| "logits/chosen": -3.6328125, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -98.5, | |
| "logps/rejected": -400.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 22.375, | |
| "rewards/rejected": -16.90625, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8046744574290484, | |
| "grad_norm": 0.0008768016705289483, | |
| "learning_rate": 0.0001350512937796695, | |
| "logits/chosen": -4.09375, | |
| "logits/rejected": -2.12890625, | |
| "logps/chosen": -61.875, | |
| "logps/rejected": -414.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.453125, | |
| "rewards/margins": 22.75, | |
| "rewards/rejected": -17.375, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.8080133555926544, | |
| "grad_norm": 0.0008365919347852468, | |
| "learning_rate": 0.00013484605534080045, | |
| "logits/chosen": -3.484375, | |
| "logits/rejected": -2.02734375, | |
| "logps/chosen": -90.25, | |
| "logps/rejected": -382.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 21.625, | |
| "rewards/rejected": -15.96875, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.8113522537562604, | |
| "grad_norm": 0.0011707853991538286, | |
| "learning_rate": 0.00013464013351813248, | |
| "logits/chosen": -3.8125, | |
| "logits/rejected": -2.1953125, | |
| "logps/chosen": -75.5, | |
| "logps/rejected": -402.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 22.5, | |
| "rewards/rejected": -16.75, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.8146911519198664, | |
| "grad_norm": 0.05999299883842468, | |
| "learning_rate": 0.00013443353087745885, | |
| "logits/chosen": -3.546875, | |
| "logits/rejected": -2.0390625, | |
| "logps/chosen": -119.75, | |
| "logps/rejected": -389.0, | |
| "loss": 0.004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.296875, | |
| "rewards/margins": 21.375, | |
| "rewards/rejected": -15.09375, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.8180300500834724, | |
| "grad_norm": 1.330207032879116e-05, | |
| "learning_rate": 0.00013422624999305578, | |
| "logits/chosen": -3.25, | |
| "logits/rejected": -2.0859375, | |
| "logps/chosen": -101.0, | |
| "logps/rejected": -434.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.625, | |
| "rewards/margins": 23.3125, | |
| "rewards/rejected": -17.625, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.8213689482470785, | |
| "grad_norm": 0.001336806220933795, | |
| "learning_rate": 0.00013401829344765045, | |
| "logits/chosen": -3.65625, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -93.75, | |
| "logps/rejected": -427.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.109375, | |
| "rewards/margins": 22.0625, | |
| "rewards/rejected": -17.0, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.8247078464106845, | |
| "grad_norm": 1.3096532711642794e-05, | |
| "learning_rate": 0.00013380966383238883, | |
| "logits/chosen": -3.5625, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -477.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 24.9375, | |
| "rewards/rejected": -19.625, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.8280467445742905, | |
| "grad_norm": 0.0002120399149134755, | |
| "learning_rate": 0.00013360036374680334, | |
| "logits/chosen": -3.5625, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -82.75, | |
| "logps/rejected": -436.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5, | |
| "rewards/margins": 23.5625, | |
| "rewards/rejected": -18.0625, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.8313856427378965, | |
| "grad_norm": 0.0001271862565772608, | |
| "learning_rate": 0.0001333903957987805, | |
| "logits/chosen": -3.46875, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -80.25, | |
| "logps/rejected": -415.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.859375, | |
| "rewards/margins": 24.25, | |
| "rewards/rejected": -18.4375, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.8347245409015025, | |
| "grad_norm": 1.912344669108279e-05, | |
| "learning_rate": 0.00013317976260452836, | |
| "logits/chosen": -3.4765625, | |
| "logits/rejected": -1.74609375, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -422.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.1875, | |
| "rewards/margins": 22.9375, | |
| "rewards/rejected": -16.78125, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8380634390651085, | |
| "grad_norm": 0.0015317240031436086, | |
| "learning_rate": 0.00013296846678854406, | |
| "logits/chosen": -3.7265625, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -84.0, | |
| "logps/rejected": -411.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 23.625, | |
| "rewards/rejected": -18.0, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.8414023372287145, | |
| "grad_norm": 0.013954302296042442, | |
| "learning_rate": 0.0001327565109835809, | |
| "logits/chosen": -3.578125, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -119.5, | |
| "logps/rejected": -471.0, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.25, | |
| "rewards/margins": 24.0, | |
| "rewards/rejected": -18.75, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8447412353923205, | |
| "grad_norm": 0.0019699318800121546, | |
| "learning_rate": 0.00013254389783061584, | |
| "logits/chosen": -3.6953125, | |
| "logits/rejected": -1.88671875, | |
| "logps/chosen": -105.75, | |
| "logps/rejected": -401.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.484375, | |
| "rewards/margins": 22.625, | |
| "rewards/rejected": -17.15625, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8480801335559266, | |
| "grad_norm": 0.03861398622393608, | |
| "learning_rate": 0.00013233062997881627, | |
| "logits/chosen": -3.453125, | |
| "logits/rejected": -2.0859375, | |
| "logps/chosen": -104.75, | |
| "logps/rejected": -402.0, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 22.25, | |
| "rewards/rejected": -16.6875, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8514190317195326, | |
| "grad_norm": 8.354683086508885e-05, | |
| "learning_rate": 0.00013211671008550718, | |
| "logits/chosen": -3.7265625, | |
| "logits/rejected": -1.89453125, | |
| "logps/chosen": -89.25, | |
| "logps/rejected": -417.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.46875, | |
| "rewards/margins": 23.0625, | |
| "rewards/rejected": -17.625, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.8547579298831386, | |
| "grad_norm": 0.015042081475257874, | |
| "learning_rate": 0.0001319021408161381, | |
| "logits/chosen": -3.671875, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -89.0, | |
| "logps/rejected": -421.0, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.78125, | |
| "rewards/margins": 24.375, | |
| "rewards/rejected": -18.5625, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8580968280467446, | |
| "grad_norm": 0.0004783151962328702, | |
| "learning_rate": 0.0001316869248442497, | |
| "logits/chosen": -3.5078125, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -445.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 24.3125, | |
| "rewards/rejected": -19.0625, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8614357262103506, | |
| "grad_norm": 0.11549096554517746, | |
| "learning_rate": 0.00013147106485144068, | |
| "logits/chosen": -3.5703125, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -449.0, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.953125, | |
| "rewards/margins": 23.625, | |
| "rewards/rejected": -18.625, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.8647746243739566, | |
| "grad_norm": 0.005697562824934721, | |
| "learning_rate": 0.00013125456352733423, | |
| "logits/chosen": -3.5859375, | |
| "logits/rejected": -2.0859375, | |
| "logps/chosen": -104.5, | |
| "logps/rejected": -425.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 23.375, | |
| "rewards/rejected": -18.0625, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.8681135225375626, | |
| "grad_norm": 0.07381512969732285, | |
| "learning_rate": 0.0001310374235695445, | |
| "logits/chosen": -3.328125, | |
| "logits/rejected": -1.92578125, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -465.0, | |
| "loss": 0.0023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 25.9375, | |
| "rewards/rejected": -20.625, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8714524207011686, | |
| "grad_norm": 0.004491521045565605, | |
| "learning_rate": 0.00013081964768364308, | |
| "logits/chosen": -3.546875, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -95.5, | |
| "logps/rejected": -450.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.28125, | |
| "rewards/margins": 24.875, | |
| "rewards/rejected": -19.625, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.8747913188647746, | |
| "grad_norm": 0.00013118820788804442, | |
| "learning_rate": 0.0001306012385831253, | |
| "logits/chosen": -3.765625, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -106.25, | |
| "logps/rejected": -419.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.734375, | |
| "rewards/margins": 23.4375, | |
| "rewards/rejected": -18.75, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.8781302170283807, | |
| "grad_norm": 0.00017179777205456048, | |
| "learning_rate": 0.00013038219898937629, | |
| "logits/chosen": -3.875, | |
| "logits/rejected": -2.1171875, | |
| "logps/chosen": -63.125, | |
| "logps/rejected": -422.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 23.75, | |
| "rewards/rejected": -18.5625, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.8814691151919867, | |
| "grad_norm": 0.00010286461474606767, | |
| "learning_rate": 0.00013016253163163714, | |
| "logits/chosen": -3.4765625, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -409.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.046875, | |
| "rewards/margins": 22.875, | |
| "rewards/rejected": -17.8125, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8848080133555927, | |
| "grad_norm": 0.0005210632225498557, | |
| "learning_rate": 0.000129942239246971, | |
| "logits/chosen": -3.765625, | |
| "logits/rejected": -2.1015625, | |
| "logps/chosen": -90.75, | |
| "logps/rejected": -473.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 26.4375, | |
| "rewards/rejected": -20.6875, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8881469115191987, | |
| "grad_norm": 0.0008880659588612616, | |
| "learning_rate": 0.00012972132458022878, | |
| "logits/chosen": -3.53125, | |
| "logits/rejected": -1.99609375, | |
| "logps/chosen": -104.5, | |
| "logps/rejected": -401.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.890625, | |
| "rewards/margins": 22.1875, | |
| "rewards/rejected": -17.1875, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.8914858096828047, | |
| "grad_norm": 7.1963854679779615e-06, | |
| "learning_rate": 0.00012949979038401503, | |
| "logits/chosen": -3.2734375, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -110.0, | |
| "logps/rejected": -424.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5, | |
| "rewards/margins": 24.125, | |
| "rewards/rejected": -18.625, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8948247078464107, | |
| "grad_norm": 0.0002867156290449202, | |
| "learning_rate": 0.00012927763941865378, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -1.87109375, | |
| "logps/chosen": -86.75, | |
| "logps/rejected": -478.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 26.6875, | |
| "rewards/rejected": -21.4375, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8981636060100167, | |
| "grad_norm": 0.7056188583374023, | |
| "learning_rate": 0.00012905487445215394, | |
| "logits/chosen": -4.0703125, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -92.75, | |
| "logps/rejected": -399.0, | |
| "loss": 0.2617, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.546875, | |
| "rewards/margins": 22.75, | |
| "rewards/rejected": -17.25, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.9015025041736227, | |
| "grad_norm": 3.4810282159014605e-06, | |
| "learning_rate": 0.0001288314982601749, | |
| "logits/chosen": -4.1171875, | |
| "logits/rejected": -2.2109375, | |
| "logps/chosen": -105.25, | |
| "logps/rejected": -473.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 27.75, | |
| "rewards/rejected": -22.1875, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9048414023372288, | |
| "grad_norm": 2.5784022000152618e-05, | |
| "learning_rate": 0.00012860751362599193, | |
| "logits/chosen": -4.0078125, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -79.125, | |
| "logps/rejected": -472.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.34375, | |
| "rewards/margins": 28.875, | |
| "rewards/rejected": -23.5625, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.9081803005008348, | |
| "grad_norm": 5.8086599892703816e-05, | |
| "learning_rate": 0.00012838292334046156, | |
| "logits/chosen": -4.59375, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -84.0, | |
| "logps/rejected": -514.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.390625, | |
| "rewards/margins": 30.25, | |
| "rewards/rejected": -25.875, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.9115191986644408, | |
| "grad_norm": 2.144264362868853e-05, | |
| "learning_rate": 0.00012815773020198674, | |
| "logits/chosen": -4.703125, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -87.75, | |
| "logps/rejected": -630.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8046875, | |
| "rewards/margins": 35.375, | |
| "rewards/rejected": -31.625, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.9148580968280468, | |
| "grad_norm": 0.010493806563317776, | |
| "learning_rate": 0.00012793193701648195, | |
| "logits/chosen": -4.046875, | |
| "logits/rejected": -2.328125, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -568.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.09375, | |
| "rewards/margins": 35.625, | |
| "rewards/rejected": -31.5625, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.9181969949916527, | |
| "grad_norm": 0.0006047628121450543, | |
| "learning_rate": 0.0001277055465973383, | |
| "logits/chosen": -4.796875, | |
| "logits/rejected": -2.640625, | |
| "logps/chosen": -93.0, | |
| "logps/rejected": -584.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6484375, | |
| "rewards/margins": 37.375, | |
| "rewards/rejected": -33.8125, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9215358931552587, | |
| "grad_norm": 0.0015730452723801136, | |
| "learning_rate": 0.0001274785617653885, | |
| "logits/chosen": -5.0, | |
| "logits/rejected": -2.8203125, | |
| "logps/chosen": -99.25, | |
| "logps/rejected": -501.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8671875, | |
| "rewards/margins": 33.8125, | |
| "rewards/rejected": -29.9375, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.9248747913188647, | |
| "grad_norm": 2.2766906113247387e-05, | |
| "learning_rate": 0.00012725098534887162, | |
| "logits/chosen": -4.59375, | |
| "logits/rejected": -2.7265625, | |
| "logps/chosen": -106.75, | |
| "logps/rejected": -640.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3203125, | |
| "rewards/margins": 41.875, | |
| "rewards/rejected": -38.5, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.9282136894824707, | |
| "grad_norm": 4.425516090122983e-05, | |
| "learning_rate": 0.00012702282018339786, | |
| "logits/chosen": -5.125, | |
| "logits/rejected": -2.9453125, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -592.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0625, | |
| "rewards/margins": 40.75, | |
| "rewards/rejected": -37.625, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.9315525876460768, | |
| "grad_norm": 0.002112521091476083, | |
| "learning_rate": 0.00012679406911191333, | |
| "logits/chosen": -4.90625, | |
| "logits/rejected": -2.921875, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -564.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.59375, | |
| "rewards/margins": 38.75, | |
| "rewards/rejected": -35.25, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.9348914858096828, | |
| "grad_norm": 6.724369995936286e-06, | |
| "learning_rate": 0.00012656473498466446, | |
| "logits/chosen": -5.1875, | |
| "logits/rejected": -2.9921875, | |
| "logps/chosen": -115.75, | |
| "logps/rejected": -666.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9921875, | |
| "rewards/margins": 45.5, | |
| "rewards/rejected": -42.625, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9382303839732888, | |
| "grad_norm": 9.46976160776103e-06, | |
| "learning_rate": 0.00012633482065916267, | |
| "logits/chosen": -5.375, | |
| "logits/rejected": -2.875, | |
| "logps/chosen": -127.75, | |
| "logps/rejected": -700.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.328125, | |
| "rewards/margins": 46.375, | |
| "rewards/rejected": -44.0, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9415692821368948, | |
| "grad_norm": 0.0018350208410993218, | |
| "learning_rate": 0.00012610432900014864, | |
| "logits/chosen": -5.140625, | |
| "logits/rejected": -3.078125, | |
| "logps/chosen": -132.5, | |
| "logps/rejected": -660.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.2890625, | |
| "rewards/margins": 46.25, | |
| "rewards/rejected": -43.875, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9449081803005008, | |
| "grad_norm": 0.5556798577308655, | |
| "learning_rate": 0.0001258732628795566, | |
| "logits/chosen": -5.234375, | |
| "logits/rejected": -3.1875, | |
| "logps/chosen": -106.5, | |
| "logps/rejected": -670.0, | |
| "loss": 0.023, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 2.984375, | |
| "rewards/margins": 47.125, | |
| "rewards/rejected": -44.25, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9482470784641068, | |
| "grad_norm": 5.686655276804231e-05, | |
| "learning_rate": 0.00012564162517647863, | |
| "logits/chosen": -5.375, | |
| "logits/rejected": -3.03125, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -756.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.84375, | |
| "rewards/margins": 51.375, | |
| "rewards/rejected": -48.5, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9515859766277128, | |
| "grad_norm": 1.8947954231407493e-05, | |
| "learning_rate": 0.00012540941877712877, | |
| "logits/chosen": -4.875, | |
| "logits/rejected": -2.9921875, | |
| "logps/chosen": -112.5, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5703125, | |
| "rewards/margins": 50.625, | |
| "rewards/rejected": -47.0, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9549248747913188, | |
| "grad_norm": 2.8031481633661315e-05, | |
| "learning_rate": 0.00012517664657480694, | |
| "logits/chosen": -4.765625, | |
| "logits/rejected": -2.890625, | |
| "logps/chosen": -124.25, | |
| "logps/rejected": -710.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1015625, | |
| "rewards/margins": 48.375, | |
| "rewards/rejected": -45.375, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9582637729549248, | |
| "grad_norm": 1.9544756924005924e-06, | |
| "learning_rate": 0.00012494331146986314, | |
| "logits/chosen": -5.109375, | |
| "logits/rejected": -2.875, | |
| "logps/chosen": -100.0, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.59375, | |
| "rewards/margins": 48.75, | |
| "rewards/rejected": -45.125, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.9616026711185309, | |
| "grad_norm": 1.1154845103167332e-10, | |
| "learning_rate": 0.00012470941636966103, | |
| "logits/chosen": -5.15625, | |
| "logits/rejected": -2.8828125, | |
| "logps/chosen": -94.0, | |
| "logps/rejected": -734.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9609375, | |
| "rewards/margins": 51.375, | |
| "rewards/rejected": -47.5, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.9649415692821369, | |
| "grad_norm": 1.1422841453168076e-05, | |
| "learning_rate": 0.00012447496418854188, | |
| "logits/chosen": -5.1875, | |
| "logits/rejected": -2.921875, | |
| "logps/chosen": -122.25, | |
| "logps/rejected": -720.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.640625, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -46.625, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.9682804674457429, | |
| "grad_norm": 1.922987102886964e-08, | |
| "learning_rate": 0.00012423995784778817, | |
| "logits/chosen": -5.0625, | |
| "logits/rejected": -3.0, | |
| "logps/chosen": -109.0, | |
| "logps/rejected": -722.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.578125, | |
| "rewards/margins": 50.125, | |
| "rewards/rejected": -46.625, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9716193656093489, | |
| "grad_norm": 0.7204757332801819, | |
| "learning_rate": 0.00012400440027558732, | |
| "logits/chosen": -5.09375, | |
| "logits/rejected": -2.859375, | |
| "logps/chosen": -120.75, | |
| "logps/rejected": -694.0, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.546875, | |
| "rewards/margins": 45.875, | |
| "rewards/rejected": -43.25, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.9749582637729549, | |
| "grad_norm": 7.915846822470485e-08, | |
| "learning_rate": 0.000123768294406995, | |
| "logits/chosen": -5.03125, | |
| "logits/rejected": -3.1484375, | |
| "logps/chosen": -118.75, | |
| "logps/rejected": -678.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.5625, | |
| "rewards/margins": 49.125, | |
| "rewards/rejected": -44.625, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.9782971619365609, | |
| "grad_norm": 3.635519169620238e-05, | |
| "learning_rate": 0.00012353164318389874, | |
| "logits/chosen": -4.96875, | |
| "logits/rejected": -3.109375, | |
| "logps/chosen": -99.25, | |
| "logps/rejected": -630.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.265625, | |
| "rewards/margins": 45.25, | |
| "rewards/rejected": -41.0, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.9816360601001669, | |
| "grad_norm": 2.1648361325787846e-06, | |
| "learning_rate": 0.00012329444955498116, | |
| "logits/chosen": -4.9375, | |
| "logits/rejected": -2.734375, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -664.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.734375, | |
| "rewards/margins": 47.625, | |
| "rewards/rejected": -42.875, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9849749582637729, | |
| "grad_norm": 4.366306711744983e-07, | |
| "learning_rate": 0.00012305671647568338, | |
| "logits/chosen": -4.890625, | |
| "logits/rejected": -2.9375, | |
| "logps/chosen": -116.75, | |
| "logps/rejected": -668.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.921875, | |
| "rewards/margins": 46.375, | |
| "rewards/rejected": -41.5, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.988313856427379, | |
| "grad_norm": 9.796775884751696e-06, | |
| "learning_rate": 0.00012281844690816793, | |
| "logits/chosen": -4.3125, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -115.75, | |
| "logps/rejected": -678.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.609375, | |
| "rewards/margins": 46.375, | |
| "rewards/rejected": -41.75, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.991652754590985, | |
| "grad_norm": 1.267866196030809e-06, | |
| "learning_rate": 0.0001225796438212822, | |
| "logits/chosen": -4.484375, | |
| "logits/rejected": -2.828125, | |
| "logps/chosen": -81.0, | |
| "logps/rejected": -646.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.265625, | |
| "rewards/margins": 46.75, | |
| "rewards/rejected": -41.375, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.994991652754591, | |
| "grad_norm": 7.400533519330565e-10, | |
| "learning_rate": 0.00012234031019052103, | |
| "logits/chosen": -5.25, | |
| "logits/rejected": -2.8125, | |
| "logps/chosen": -72.75, | |
| "logps/rejected": -664.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.203125, | |
| "rewards/margins": 46.125, | |
| "rewards/rejected": -41.0, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.998330550918197, | |
| "grad_norm": 1.766308876938183e-08, | |
| "learning_rate": 0.00012210044899799003, | |
| "logits/chosen": -4.171875, | |
| "logits/rejected": -2.765625, | |
| "logps/chosen": -90.5, | |
| "logps/rejected": -684.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 48.0, | |
| "rewards/rejected": -42.125, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.3598018944758223e-06, | |
| "learning_rate": 0.00012186006323236816, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.6875, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -680.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.40625, | |
| "rewards/margins": 48.5, | |
| "rewards/rejected": -44.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.003338898163606, | |
| "grad_norm": 4.096188988000904e-08, | |
| "learning_rate": 0.00012161915588887058, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.8671875, | |
| "logps/chosen": -94.25, | |
| "logps/rejected": -626.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.234375, | |
| "rewards/margins": 43.875, | |
| "rewards/rejected": -38.625, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.006677796327212, | |
| "grad_norm": 1.8207643734058365e-05, | |
| "learning_rate": 0.0001213777299692114, | |
| "logits/chosen": -4.5, | |
| "logits/rejected": -2.609375, | |
| "logps/chosen": -110.5, | |
| "logps/rejected": -604.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 41.5, | |
| "rewards/rejected": -35.875, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.010016694490818, | |
| "grad_norm": 3.5500662409759798e-09, | |
| "learning_rate": 0.00012113578848156614, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -2.71875, | |
| "logps/chosen": -87.0, | |
| "logps/rejected": -658.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.546875, | |
| "rewards/margins": 46.25, | |
| "rewards/rejected": -40.625, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.013355592654424, | |
| "grad_norm": 0.0002687852829694748, | |
| "learning_rate": 0.00012089333444053437, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.6796875, | |
| "logps/chosen": -76.5, | |
| "logps/rejected": -600.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 41.25, | |
| "rewards/rejected": -35.875, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.01669449081803, | |
| "grad_norm": 7.73879182958126e-09, | |
| "learning_rate": 0.000120650370867102, | |
| "logits/chosen": -3.875, | |
| "logits/rejected": -2.84375, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -630.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 44.25, | |
| "rewards/rejected": -38.5, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.020033388981636, | |
| "grad_norm": 1.89209330869744e-07, | |
| "learning_rate": 0.0001204069007886038, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -594.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.984375, | |
| "rewards/margins": 41.5, | |
| "rewards/rejected": -36.5, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.023372287145242, | |
| "grad_norm": 6.296863119814589e-08, | |
| "learning_rate": 0.0001201629272386856, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.8359375, | |
| "logps/chosen": -80.5, | |
| "logps/rejected": -604.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.234375, | |
| "rewards/margins": 43.125, | |
| "rewards/rejected": -38.0, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.026711185308848, | |
| "grad_norm": 1.2057295862177853e-05, | |
| "learning_rate": 0.00011991845325726657, | |
| "logits/chosen": -4.875, | |
| "logits/rejected": -2.9765625, | |
| "logps/chosen": -71.125, | |
| "logps/rejected": -582.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.90625, | |
| "rewards/margins": 41.125, | |
| "rewards/rejected": -36.25, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.0300500834724542, | |
| "grad_norm": 1.0573453579354464e-07, | |
| "learning_rate": 0.00011967348189050114, | |
| "logits/chosen": -4.828125, | |
| "logits/rejected": -2.8671875, | |
| "logps/chosen": -80.25, | |
| "logps/rejected": -632.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0, | |
| "rewards/margins": 44.625, | |
| "rewards/rejected": -38.75, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.0333889816360602, | |
| "grad_norm": 8.637631481178687e-07, | |
| "learning_rate": 0.00011942801619074128, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -2.6953125, | |
| "logps/chosen": -76.25, | |
| "logps/rejected": -642.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 44.5, | |
| "rewards/rejected": -38.875, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0367278797996662, | |
| "grad_norm": 2.945505617191202e-09, | |
| "learning_rate": 0.00011918205921649828, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.7421875, | |
| "logps/chosen": -92.75, | |
| "logps/rejected": -596.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.90625, | |
| "rewards/margins": 43.25, | |
| "rewards/rejected": -37.375, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.0400667779632722, | |
| "grad_norm": 3.719551671110821e-07, | |
| "learning_rate": 0.00011893561403240484, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.7265625, | |
| "logps/chosen": -84.125, | |
| "logps/rejected": -622.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 42.75, | |
| "rewards/rejected": -37.125, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.0434056761268782, | |
| "grad_norm": 4.653336205251435e-08, | |
| "learning_rate": 0.0001186886837091767, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -2.5625, | |
| "logps/chosen": -93.0, | |
| "logps/rejected": -646.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.40625, | |
| "rewards/margins": 43.375, | |
| "rewards/rejected": -38.125, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.0467445742904842, | |
| "grad_norm": 3.053295358768082e-07, | |
| "learning_rate": 0.00011844127132357443, | |
| "logits/chosen": -3.7421875, | |
| "logits/rejected": -2.75, | |
| "logps/chosen": -102.5, | |
| "logps/rejected": -616.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.546875, | |
| "rewards/margins": 42.625, | |
| "rewards/rejected": -38.0, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.0500834724540902, | |
| "grad_norm": 2.1130126981461217e-08, | |
| "learning_rate": 0.00011819337995836521, | |
| "logits/chosen": -4.046875, | |
| "logits/rejected": -2.765625, | |
| "logps/chosen": -117.25, | |
| "logps/rejected": -642.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.171875, | |
| "rewards/margins": 43.125, | |
| "rewards/rejected": -37.0, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.0534223706176962, | |
| "grad_norm": 3.6415363524611166e-07, | |
| "learning_rate": 0.00011794501270228418, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -103.25, | |
| "logps/rejected": -600.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 40.0, | |
| "rewards/rejected": -34.375, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.0567612687813022, | |
| "grad_norm": 2.995850678644274e-08, | |
| "learning_rate": 0.00011769617264999628, | |
| "logits/chosen": -4.40625, | |
| "logits/rejected": -2.71875, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -610.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 41.75, | |
| "rewards/rejected": -36.625, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.0601001669449082, | |
| "grad_norm": 1.6123553880920127e-10, | |
| "learning_rate": 0.0001174468629020573, | |
| "logits/chosen": -4.484375, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -90.75, | |
| "logps/rejected": -628.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 43.875, | |
| "rewards/rejected": -38.375, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.0634390651085142, | |
| "grad_norm": 5.522470019059256e-07, | |
| "learning_rate": 0.00011719708656487565, | |
| "logits/chosen": -4.484375, | |
| "logits/rejected": -2.6953125, | |
| "logps/chosen": -86.25, | |
| "logps/rejected": -606.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 41.625, | |
| "rewards/rejected": -36.25, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.0667779632721202, | |
| "grad_norm": 3.308125773315851e-09, | |
| "learning_rate": 0.0001169468467506733, | |
| "logits/chosen": -4.5, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -88.75, | |
| "logps/rejected": -600.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.53125, | |
| "rewards/margins": 41.375, | |
| "rewards/rejected": -35.75, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0701168614357262, | |
| "grad_norm": 9.489669139384205e-08, | |
| "learning_rate": 0.00011669614657744725, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.6953125, | |
| "logps/chosen": -99.75, | |
| "logps/rejected": -578.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.453125, | |
| "rewards/margins": 40.25, | |
| "rewards/rejected": -34.875, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.0734557595993321, | |
| "grad_norm": 5.833362126672625e-10, | |
| "learning_rate": 0.0001164449891689306, | |
| "logits/chosen": -4.625, | |
| "logits/rejected": -2.6875, | |
| "logps/chosen": -80.25, | |
| "logps/rejected": -660.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 44.125, | |
| "rewards/rejected": -38.875, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0767946577629381, | |
| "grad_norm": 3.6675750436643284e-08, | |
| "learning_rate": 0.00011619337765455356, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -71.875, | |
| "logps/rejected": -656.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 44.625, | |
| "rewards/rejected": -39.0, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.0801335559265441, | |
| "grad_norm": 2.6781217457028106e-06, | |
| "learning_rate": 0.00011594131516940455, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.6875, | |
| "logps/chosen": -88.0, | |
| "logps/rejected": -562.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.140625, | |
| "rewards/margins": 38.5, | |
| "rewards/rejected": -33.375, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.0834724540901504, | |
| "grad_norm": 4.066099279498303e-07, | |
| "learning_rate": 0.00011568880485419107, | |
| "logits/chosen": -4.921875, | |
| "logits/rejected": -2.75, | |
| "logps/chosen": -74.0, | |
| "logps/rejected": -572.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 41.375, | |
| "rewards/rejected": -35.75, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.0868113522537564, | |
| "grad_norm": 9.277933088469581e-09, | |
| "learning_rate": 0.00011543584985520065, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.90625, | |
| "logps/chosen": -86.0, | |
| "logps/rejected": -602.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.765625, | |
| "rewards/margins": 41.0, | |
| "rewards/rejected": -36.25, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.0901502504173624, | |
| "grad_norm": 5.080758036513089e-09, | |
| "learning_rate": 0.00011518245332426155, | |
| "logits/chosen": -3.8828125, | |
| "logits/rejected": -2.7578125, | |
| "logps/chosen": -111.0, | |
| "logps/rejected": -662.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 43.875, | |
| "rewards/rejected": -38.5, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.0934891485809684, | |
| "grad_norm": 9.527740729708967e-08, | |
| "learning_rate": 0.00011492861841870358, | |
| "logits/chosen": -4.2109375, | |
| "logits/rejected": -2.671875, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -582.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 41.875, | |
| "rewards/rejected": -36.25, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.0968280467445743, | |
| "grad_norm": 1.594116305625448e-08, | |
| "learning_rate": 0.00011467434830131869, | |
| "logits/chosen": -4.84375, | |
| "logits/rejected": -2.59375, | |
| "logps/chosen": -70.5, | |
| "logps/rejected": -666.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.40625, | |
| "rewards/margins": 44.75, | |
| "rewards/rejected": -39.375, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.1001669449081803, | |
| "grad_norm": 3.618521532189334e-06, | |
| "learning_rate": 0.00011441964614032151, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.7890625, | |
| "logps/chosen": -101.75, | |
| "logps/rejected": -572.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.53125, | |
| "rewards/margins": 39.25, | |
| "rewards/rejected": -33.8125, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.1035058430717863, | |
| "grad_norm": 2.0210993625369156e-06, | |
| "learning_rate": 0.00011416451510931009, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.6171875, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -553.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.03125, | |
| "rewards/margins": 38.625, | |
| "rewards/rejected": -33.5625, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.1068447412353923, | |
| "grad_norm": 1.149137052181004e-08, | |
| "learning_rate": 0.00011390895838722613, | |
| "logits/chosen": -4.171875, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -115.0, | |
| "logps/rejected": -566.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.015625, | |
| "rewards/margins": 39.625, | |
| "rewards/rejected": -34.625, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.1101836393989983, | |
| "grad_norm": 6.3274292472215166e-09, | |
| "learning_rate": 0.00011365297915831545, | |
| "logits/chosen": -4.328125, | |
| "logits/rejected": -2.53125, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 45.75, | |
| "rewards/rejected": -39.75, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.1135225375626043, | |
| "grad_norm": 5.55765291210264e-06, | |
| "learning_rate": 0.00011339658061208833, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.7109375, | |
| "logps/chosen": -105.5, | |
| "logps/rejected": -616.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.359375, | |
| "rewards/margins": 42.875, | |
| "rewards/rejected": -36.625, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.1168614357262103, | |
| "grad_norm": 0.001700929249636829, | |
| "learning_rate": 0.0001131397659432798, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.78125, | |
| "logps/chosen": -102.5, | |
| "logps/rejected": -564.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 39.625, | |
| "rewards/rejected": -34.125, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.1202003338898163, | |
| "grad_norm": 6.786502240174741e-07, | |
| "learning_rate": 0.00011288253835180975, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.8984375, | |
| "logps/chosen": -91.5, | |
| "logps/rejected": -548.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 37.75, | |
| "rewards/rejected": -32.5, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.1235392320534223, | |
| "grad_norm": 1.9758540759085008e-07, | |
| "learning_rate": 0.00011262490104274313, | |
| "logits/chosen": -3.96875, | |
| "logits/rejected": -2.6640625, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -580.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.546875, | |
| "rewards/margins": 40.25, | |
| "rewards/rejected": -34.6875, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.1268781302170283, | |
| "grad_norm": 2.381622380198678e-06, | |
| "learning_rate": 0.00011236685722624995, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.734375, | |
| "logps/chosen": -92.75, | |
| "logps/rejected": -592.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 41.5, | |
| "rewards/rejected": -36.375, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.1302170283806343, | |
| "grad_norm": 1.006454340313212e-06, | |
| "learning_rate": 0.0001121084101175653, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.6015625, | |
| "logps/chosen": -77.625, | |
| "logps/rejected": -644.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.609375, | |
| "rewards/margins": 43.75, | |
| "rewards/rejected": -38.125, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.1335559265442403, | |
| "grad_norm": 2.7868235719097356e-08, | |
| "learning_rate": 0.00011184956293694941, | |
| "logits/chosen": -3.8125, | |
| "logits/rejected": -2.8046875, | |
| "logps/chosen": -107.0, | |
| "logps/rejected": -586.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.15625, | |
| "rewards/margins": 41.0, | |
| "rewards/rejected": -35.875, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1368948247078463, | |
| "grad_norm": 5.081914196125581e-07, | |
| "learning_rate": 0.00011159031890964731, | |
| "logits/chosen": -4.890625, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -92.0, | |
| "logps/rejected": -568.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.640625, | |
| "rewards/margins": 40.75, | |
| "rewards/rejected": -35.125, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.1402337228714523, | |
| "grad_norm": 3.7901945688645355e-06, | |
| "learning_rate": 0.00011133068126584881, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.7265625, | |
| "logps/chosen": -76.5, | |
| "logps/rejected": -576.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 41.875, | |
| "rewards/rejected": -36.625, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.1435726210350585, | |
| "grad_norm": 1.784697616358244e-07, | |
| "learning_rate": 0.00011107065324064816, | |
| "logits/chosen": -3.90625, | |
| "logits/rejected": -2.625, | |
| "logps/chosen": -103.0, | |
| "logps/rejected": -638.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.078125, | |
| "rewards/margins": 41.625, | |
| "rewards/rejected": -36.5, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.1469115191986645, | |
| "grad_norm": 1.5315407608795795e-06, | |
| "learning_rate": 0.00011081023807400378, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -91.0, | |
| "logps/rejected": -568.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.859375, | |
| "rewards/margins": 40.125, | |
| "rewards/rejected": -34.375, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.1502504173622705, | |
| "grad_norm": 1.3057597669785537e-08, | |
| "learning_rate": 0.000110549439010698, | |
| "logits/chosen": -4.390625, | |
| "logits/rejected": -2.6640625, | |
| "logps/chosen": -88.5, | |
| "logps/rejected": -674.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.0625, | |
| "rewards/margins": 45.0, | |
| "rewards/rejected": -40.0, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.1535893155258765, | |
| "grad_norm": 9.364051578586441e-08, | |
| "learning_rate": 0.0001102882593002963, | |
| "logits/chosen": -4.1015625, | |
| "logits/rejected": -2.6328125, | |
| "logps/chosen": -105.25, | |
| "logps/rejected": -604.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.109375, | |
| "rewards/margins": 41.875, | |
| "rewards/rejected": -36.625, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.1569282136894825, | |
| "grad_norm": 2.3089357092942464e-09, | |
| "learning_rate": 0.00011002670219710718, | |
| "logits/chosen": -4.25, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -85.5, | |
| "logps/rejected": -644.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 45.75, | |
| "rewards/rejected": -39.75, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.1602671118530885, | |
| "grad_norm": 1.3028484318056144e-06, | |
| "learning_rate": 0.0001097647709601415, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.7265625, | |
| "logps/chosen": -108.75, | |
| "logps/rejected": -608.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0625, | |
| "rewards/margins": 41.75, | |
| "rewards/rejected": -35.75, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.1636060100166945, | |
| "grad_norm": 4.899620709153396e-09, | |
| "learning_rate": 0.00010950246885307172, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.625, | |
| "logps/chosen": -81.0, | |
| "logps/rejected": -632.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.125, | |
| "rewards/margins": 42.625, | |
| "rewards/rejected": -37.5, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.1669449081803005, | |
| "grad_norm": 2.0634535857766423e-08, | |
| "learning_rate": 0.00010923979914419147, | |
| "logits/chosen": -4.390625, | |
| "logits/rejected": -2.7734375, | |
| "logps/chosen": -96.5, | |
| "logps/rejected": -620.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 42.25, | |
| "rewards/rejected": -37.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1702838063439065, | |
| "grad_norm": 1.040544761465867e-09, | |
| "learning_rate": 0.00010897676510637461, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.828125, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -644.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 44.125, | |
| "rewards/rejected": -38.375, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.1736227045075125, | |
| "grad_norm": 3.300164053143817e-07, | |
| "learning_rate": 0.00010871337001703463, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.71875, | |
| "logps/chosen": -88.25, | |
| "logps/rejected": -636.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.03125, | |
| "rewards/margins": 43.875, | |
| "rewards/rejected": -38.75, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.1769616026711185, | |
| "grad_norm": 0.002110698726028204, | |
| "learning_rate": 0.00010844961715808369, | |
| "logits/chosen": -4.65625, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -78.25, | |
| "logps/rejected": -672.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 44.5, | |
| "rewards/rejected": -38.875, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.1803005008347245, | |
| "grad_norm": 3.753486854662924e-09, | |
| "learning_rate": 0.00010818550981589181, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.671875, | |
| "logps/chosen": -75.5, | |
| "logps/rejected": -656.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.484375, | |
| "rewards/margins": 46.5, | |
| "rewards/rejected": -41.125, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.1836393989983305, | |
| "grad_norm": 7.482569230887748e-08, | |
| "learning_rate": 0.00010792105128124584, | |
| "logits/chosen": -4.75, | |
| "logits/rejected": -2.6328125, | |
| "logps/chosen": -69.25, | |
| "logps/rejected": -636.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.0, | |
| "rewards/margins": 43.375, | |
| "rewards/rejected": -38.5, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.1869782971619365, | |
| "grad_norm": 4.6094406069840943e-10, | |
| "learning_rate": 0.00010765624484930851, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.8046875, | |
| "logps/chosen": -95.0, | |
| "logps/rejected": -644.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 43.5, | |
| "rewards/rejected": -38.125, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.1903171953255425, | |
| "grad_norm": 7.443770932979987e-09, | |
| "learning_rate": 0.00010739109381957741, | |
| "logits/chosen": -3.671875, | |
| "logits/rejected": -2.609375, | |
| "logps/chosen": -109.25, | |
| "logps/rejected": -600.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.859375, | |
| "rewards/margins": 41.25, | |
| "rewards/rejected": -35.375, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.1936560934891487, | |
| "grad_norm": 2.510856802473427e-07, | |
| "learning_rate": 0.00010712560149584376, | |
| "logits/chosen": -3.71875, | |
| "logits/rejected": -2.7578125, | |
| "logps/chosen": -118.75, | |
| "logps/rejected": -552.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.15625, | |
| "rewards/margins": 38.75, | |
| "rewards/rejected": -32.625, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.1969949916527547, | |
| "grad_norm": 6.122155582488631e-08, | |
| "learning_rate": 0.00010685977118615136, | |
| "logits/chosen": -4.875, | |
| "logits/rejected": -2.6953125, | |
| "logps/chosen": -59.75, | |
| "logps/rejected": -688.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.859375, | |
| "rewards/margins": 46.375, | |
| "rewards/rejected": -41.5, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.2003338898163607, | |
| "grad_norm": 5.218083742875024e-08, | |
| "learning_rate": 0.00010659360620275533, | |
| "logits/chosen": -4.640625, | |
| "logits/rejected": -2.640625, | |
| "logps/chosen": -79.25, | |
| "logps/rejected": -618.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.328125, | |
| "rewards/margins": 42.625, | |
| "rewards/rejected": -37.25, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.2036727879799667, | |
| "grad_norm": 8.644967238069512e-06, | |
| "learning_rate": 0.0001063271098620808, | |
| "logits/chosen": -4.2421875, | |
| "logits/rejected": -2.6796875, | |
| "logps/chosen": -92.25, | |
| "logps/rejected": -610.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.71875, | |
| "rewards/margins": 42.625, | |
| "rewards/rejected": -36.875, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.2070116861435727, | |
| "grad_norm": 3.447842118475819e-07, | |
| "learning_rate": 0.00010606028548468168, | |
| "logits/chosen": -4.40625, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -87.0, | |
| "logps/rejected": -584.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.375, | |
| "rewards/margins": 40.875, | |
| "rewards/rejected": -35.5, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.2103505843071787, | |
| "grad_norm": 1.8150290159724136e-08, | |
| "learning_rate": 0.00010579313639519917, | |
| "logits/chosen": -4.703125, | |
| "logits/rejected": -2.7578125, | |
| "logps/chosen": -82.25, | |
| "logps/rejected": -634.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 43.0, | |
| "rewards/rejected": -37.75, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.2136894824707847, | |
| "grad_norm": 1.4830611944198608, | |
| "learning_rate": 0.00010552566592232041, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.671875, | |
| "logps/chosen": -90.25, | |
| "logps/rejected": -616.0, | |
| "loss": 0.0253, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 4.59375, | |
| "rewards/margins": 41.25, | |
| "rewards/rejected": -36.625, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.2170283806343907, | |
| "grad_norm": 1.2266583837572398e-07, | |
| "learning_rate": 0.00010525787739873704, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -2.5859375, | |
| "logps/chosen": -87.0, | |
| "logps/rejected": -590.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 40.75, | |
| "rewards/rejected": -35.0, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.2203672787979967, | |
| "grad_norm": 1.2345830327831209e-05, | |
| "learning_rate": 0.00010498977416110356, | |
| "logits/chosen": -4.1640625, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -624.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0625, | |
| "rewards/margins": 44.125, | |
| "rewards/rejected": -38.0, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.2237061769616027, | |
| "grad_norm": 3.0638256376391837e-09, | |
| "learning_rate": 0.0001047213595499958, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -82.5, | |
| "logps/rejected": -624.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.296875, | |
| "rewards/margins": 45.375, | |
| "rewards/rejected": -39.25, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.2270450751252087, | |
| "grad_norm": 0.10086002945899963, | |
| "learning_rate": 0.0001044526369098694, | |
| "logits/chosen": -3.921875, | |
| "logits/rejected": -2.6484375, | |
| "logps/chosen": -111.75, | |
| "logps/rejected": -642.0, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.015625, | |
| "rewards/margins": 45.75, | |
| "rewards/rejected": -39.625, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.2303839732888147, | |
| "grad_norm": 7.665768020359565e-10, | |
| "learning_rate": 0.00010418360958901803, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.6875, | |
| "logps/chosen": -92.0, | |
| "logps/rejected": -650.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.96875, | |
| "rewards/margins": 45.5, | |
| "rewards/rejected": -39.5, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.2337228714524207, | |
| "grad_norm": 3.2425892526610767e-10, | |
| "learning_rate": 0.00010391428093953162, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.5390625, | |
| "logps/chosen": -85.75, | |
| "logps/rejected": -696.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.140625, | |
| "rewards/margins": 49.375, | |
| "rewards/rejected": -43.25, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2370617696160267, | |
| "grad_norm": 1.0531423413340235e-06, | |
| "learning_rate": 0.00010364465431725476, | |
| "logits/chosen": -4.0078125, | |
| "logits/rejected": -2.515625, | |
| "logps/chosen": -89.75, | |
| "logps/rejected": -628.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 44.375, | |
| "rewards/rejected": -38.625, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.2404006677796326, | |
| "grad_norm": 6.026751719900858e-08, | |
| "learning_rate": 0.00010337473308174466, | |
| "logits/chosen": -4.171875, | |
| "logits/rejected": -2.5625, | |
| "logps/chosen": -79.25, | |
| "logps/rejected": -672.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.34375, | |
| "rewards/margins": 48.125, | |
| "rewards/rejected": -42.75, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.2437395659432386, | |
| "grad_norm": 0.011823623441159725, | |
| "learning_rate": 0.0001031045205962296, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -2.5625, | |
| "logps/chosen": -76.75, | |
| "logps/rejected": -620.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.375, | |
| "rewards/margins": 43.875, | |
| "rewards/rejected": -38.5, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.2470784641068446, | |
| "grad_norm": 7.656896923435852e-05, | |
| "learning_rate": 0.00010283402022756673, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.6171875, | |
| "logps/chosen": -84.0, | |
| "logps/rejected": -656.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.375, | |
| "rewards/margins": 48.0, | |
| "rewards/rejected": -41.625, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.2504173622704506, | |
| "grad_norm": 6.547962616565428e-09, | |
| "learning_rate": 0.00010256323534620024, | |
| "logits/chosen": -4.171875, | |
| "logits/rejected": -2.3203125, | |
| "logps/chosen": -105.5, | |
| "logps/rejected": -690.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.09375, | |
| "rewards/margins": 49.0, | |
| "rewards/rejected": -42.875, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.2537562604340566, | |
| "grad_norm": 2.6672342556821604e-08, | |
| "learning_rate": 0.00010229216932611939, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.640625, | |
| "logps/chosen": -79.5, | |
| "logps/rejected": -646.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.640625, | |
| "rewards/margins": 46.75, | |
| "rewards/rejected": -41.125, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.2570951585976629, | |
| "grad_norm": 1.1736976723852877e-08, | |
| "learning_rate": 0.0001020208255448164, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -79.5, | |
| "logps/rejected": -652.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.9375, | |
| "rewards/margins": 47.375, | |
| "rewards/rejected": -41.375, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.2604340567612689, | |
| "grad_norm": 6.4667449173327896e-09, | |
| "learning_rate": 0.00010174920738324448, | |
| "logits/chosen": -4.0703125, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -102.75, | |
| "logps/rejected": -616.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.296875, | |
| "rewards/margins": 46.75, | |
| "rewards/rejected": -40.5, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.2637729549248748, | |
| "grad_norm": 2.800082654630387e-07, | |
| "learning_rate": 0.00010147731822577554, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -65.875, | |
| "logps/rejected": -662.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 48.5, | |
| "rewards/rejected": -42.5, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.2671118530884808, | |
| "grad_norm": 5.510163436639459e-09, | |
| "learning_rate": 0.00010120516146015814, | |
| "logits/chosen": -4.046875, | |
| "logits/rejected": -2.3671875, | |
| "logps/chosen": -112.5, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.125, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2704507512520868, | |
| "grad_norm": 5.70578917447051e-10, | |
| "learning_rate": 0.00010093274047747526, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -85.5, | |
| "logps/rejected": -646.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.953125, | |
| "rewards/margins": 46.625, | |
| "rewards/rejected": -40.75, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.2737896494156928, | |
| "grad_norm": 2.0008934509263554e-09, | |
| "learning_rate": 0.00010066005867210204, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -77.5, | |
| "logps/rejected": -648.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 47.5, | |
| "rewards/rejected": -41.625, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.2771285475792988, | |
| "grad_norm": 3.104088364125346e-08, | |
| "learning_rate": 0.00010038711944166345, | |
| "logits/chosen": -4.515625, | |
| "logits/rejected": -2.5078125, | |
| "logps/chosen": -72.5, | |
| "logps/rejected": -622.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.828125, | |
| "rewards/margins": 46.125, | |
| "rewards/rejected": -40.25, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.2804674457429048, | |
| "grad_norm": 4.240581347403349e-06, | |
| "learning_rate": 0.00010011392618699203, | |
| "logits/chosen": -4.828125, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -66.125, | |
| "logps/rejected": -696.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 48.75, | |
| "rewards/rejected": -42.75, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.2838063439065108, | |
| "grad_norm": 1.7224666137849454e-11, | |
| "learning_rate": 9.984048231208542e-05, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -66.125, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.984375, | |
| "rewards/margins": 50.375, | |
| "rewards/rejected": -44.375, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.2871452420701168, | |
| "grad_norm": 1.3602375226184904e-08, | |
| "learning_rate": 9.956679122406401e-05, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -77.25, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 50.0, | |
| "rewards/rejected": -44.625, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.2904841402337228, | |
| "grad_norm": 2.335872886760626e-05, | |
| "learning_rate": 9.92928563331285e-05, | |
| "logits/chosen": -4.03125, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -96.75, | |
| "logps/rejected": -648.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 46.5, | |
| "rewards/rejected": -40.625, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.2938230383973288, | |
| "grad_norm": 1.2561605444716406e-06, | |
| "learning_rate": 9.901868105251735e-05, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -70.5, | |
| "logps/rejected": -648.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.46875, | |
| "rewards/margins": 46.375, | |
| "rewards/rejected": -40.875, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.2971619365609348, | |
| "grad_norm": 3.965501704783492e-09, | |
| "learning_rate": 9.874426879846435e-05, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -101.75, | |
| "logps/rejected": -640.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.203125, | |
| "rewards/margins": 46.75, | |
| "rewards/rejected": -40.5, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.300500834724541, | |
| "grad_norm": 7.816532132665088e-08, | |
| "learning_rate": 9.846962299015589e-05, | |
| "logits/chosen": -4.09375, | |
| "logits/rejected": -2.5078125, | |
| "logps/chosen": -95.0, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.78125, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.5, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.303839732888147, | |
| "grad_norm": 5.069846320537863e-09, | |
| "learning_rate": 9.819474704968854e-05, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -81.25, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 50.5, | |
| "rewards/rejected": -44.75, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.307178631051753, | |
| "grad_norm": 1.4890460988681298e-06, | |
| "learning_rate": 9.791964440202621e-05, | |
| "logits/chosen": -4.03125, | |
| "logits/rejected": -2.234375, | |
| "logps/chosen": -81.25, | |
| "logps/rejected": -746.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.71875, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.5, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.310517529215359, | |
| "grad_norm": 6.790008200141529e-08, | |
| "learning_rate": 9.764431847495775e-05, | |
| "logits/chosen": -3.578125, | |
| "logits/rejected": -2.171875, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -750.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 51.125, | |
| "rewards/rejected": -45.5, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.313856427378965, | |
| "grad_norm": 1.523936225567013e-05, | |
| "learning_rate": 9.7368772699054e-05, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -77.25, | |
| "logps/rejected": -652.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.453125, | |
| "rewards/margins": 46.0, | |
| "rewards/rejected": -40.625, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.317195325542571, | |
| "grad_norm": 0.0042861769907176495, | |
| "learning_rate": 9.709301050762508e-05, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.3125, | |
| "logps/chosen": -86.75, | |
| "logps/rejected": -686.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.359375, | |
| "rewards/margins": 48.5, | |
| "rewards/rejected": -42.125, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.320534223706177, | |
| "grad_norm": 3.386242610758927e-07, | |
| "learning_rate": 9.681703533667771e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -84.5, | |
| "logps/rejected": -660.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.28125, | |
| "rewards/margins": 47.5, | |
| "rewards/rejected": -42.125, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.323873121869783, | |
| "grad_norm": 4.3367620605749835e-07, | |
| "learning_rate": 9.65408506248723e-05, | |
| "logits/chosen": -4.265625, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -96.5, | |
| "logps/rejected": -666.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.078125, | |
| "rewards/margins": 48.375, | |
| "rewards/rejected": -42.375, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.327212020033389, | |
| "grad_norm": 1.0023908370015988e-09, | |
| "learning_rate": 9.626445981348023e-05, | |
| "logits/chosen": -4.09375, | |
| "logits/rejected": -2.3125, | |
| "logps/chosen": -83.0, | |
| "logps/rejected": -670.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.796875, | |
| "rewards/margins": 48.625, | |
| "rewards/rejected": -42.875, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.330550918196995, | |
| "grad_norm": 3.0025191222193826e-08, | |
| "learning_rate": 9.598786634634082e-05, | |
| "logits/chosen": -3.8125, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -101.5, | |
| "logps/rejected": -626.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.546875, | |
| "rewards/margins": 46.25, | |
| "rewards/rejected": -40.75, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.333889816360601, | |
| "grad_norm": 7.900404774829894e-09, | |
| "learning_rate": 9.571107366981845e-05, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.53125, | |
| "logps/chosen": -64.375, | |
| "logps/rejected": -598.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 44.75, | |
| "rewards/rejected": -39.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.337228714524207, | |
| "grad_norm": 0.001412657555192709, | |
| "learning_rate": 9.543408523275976e-05, | |
| "logits/chosen": -4.421875, | |
| "logits/rejected": -2.5078125, | |
| "logps/chosen": -79.25, | |
| "logps/rejected": -608.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.765625, | |
| "rewards/margins": 44.5, | |
| "rewards/rejected": -38.625, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.340567612687813, | |
| "grad_norm": 3.320309076570993e-07, | |
| "learning_rate": 9.51569044864505e-05, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -78.5, | |
| "logps/rejected": -740.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.015625, | |
| "rewards/margins": 53.125, | |
| "rewards/rejected": -47.125, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.343906510851419, | |
| "grad_norm": 5.0411297358721185e-09, | |
| "learning_rate": 9.487953488457264e-05, | |
| "logits/chosen": -4.2890625, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -86.875, | |
| "logps/rejected": -702.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.40625, | |
| "rewards/margins": 48.25, | |
| "rewards/rejected": -42.75, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.347245409015025, | |
| "grad_norm": 8.494340875131456e-08, | |
| "learning_rate": 9.460197988316126e-05, | |
| "logits/chosen": -4.265625, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -87.75, | |
| "logps/rejected": -650.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.78125, | |
| "rewards/margins": 47.0, | |
| "rewards/rejected": -41.125, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.350584307178631, | |
| "grad_norm": 5.050285835750401e-06, | |
| "learning_rate": 9.432424294056155e-05, | |
| "logits/chosen": -4.90625, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -59.75, | |
| "logps/rejected": -740.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 52.125, | |
| "rewards/rejected": -46.375, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.353923205342237, | |
| "grad_norm": 9.864572803053306e-07, | |
| "learning_rate": 9.404632751738566e-05, | |
| "logits/chosen": -3.7890625, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -78.75, | |
| "logps/rejected": -734.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.796875, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -45.5, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.357262103505843, | |
| "grad_norm": 3.688290962600149e-05, | |
| "learning_rate": 9.376823707646968e-05, | |
| "logits/chosen": -4.328125, | |
| "logits/rejected": -2.5703125, | |
| "logps/chosen": -99.25, | |
| "logps/rejected": -598.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5, | |
| "rewards/margins": 43.875, | |
| "rewards/rejected": -38.375, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.360601001669449, | |
| "grad_norm": 3.8295453919090505e-08, | |
| "learning_rate": 9.348997508283038e-05, | |
| "logits/chosen": -4.15625, | |
| "logits/rejected": -2.2578125, | |
| "logps/chosen": -88.5, | |
| "logps/rejected": -680.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 47.875, | |
| "rewards/rejected": -42.25, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.363939899833055, | |
| "grad_norm": 1.983067576816211e-08, | |
| "learning_rate": 9.321154500362208e-05, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -80.5, | |
| "logps/rejected": -708.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.90625, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.375, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.367278797996661, | |
| "grad_norm": 5.22494865151657e-08, | |
| "learning_rate": 9.293295030809347e-05, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -656.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 46.5, | |
| "rewards/rejected": -40.875, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3706176961602672, | |
| "grad_norm": 7.317971153497638e-07, | |
| "learning_rate": 9.265419446754433e-05, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.5234375, | |
| "logps/chosen": -81.75, | |
| "logps/rejected": -624.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.546875, | |
| "rewards/margins": 47.0, | |
| "rewards/rejected": -41.375, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.3739565943238732, | |
| "grad_norm": 5.5238370322285846e-08, | |
| "learning_rate": 9.237528095528238e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.3671875, | |
| "logps/chosen": -86.25, | |
| "logps/rejected": -618.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 43.75, | |
| "rewards/rejected": -38.0, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.3772954924874792, | |
| "grad_norm": 1.3657646125153633e-09, | |
| "learning_rate": 9.209621324657987e-05, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -74.75, | |
| "logps/rejected": -622.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.9375, | |
| "rewards/margins": 45.75, | |
| "rewards/rejected": -39.875, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.3806343906510852, | |
| "grad_norm": 6.379238470799464e-07, | |
| "learning_rate": 9.181699481863039e-05, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.3046875, | |
| "logps/chosen": -80.25, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.0625, | |
| "rewards/margins": 50.5, | |
| "rewards/rejected": -45.5, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.3839732888146912, | |
| "grad_norm": 3.687435423671559e-08, | |
| "learning_rate": 9.153762915050547e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -90.0, | |
| "logps/rejected": -648.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 49.125, | |
| "rewards/rejected": -43.375, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.3873121869782972, | |
| "grad_norm": 1.0608222282826318e-06, | |
| "learning_rate": 9.125811972311125e-05, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -94.0, | |
| "logps/rejected": -628.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.21875, | |
| "rewards/margins": 45.875, | |
| "rewards/rejected": -39.625, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.3906510851419032, | |
| "grad_norm": 1.4249868840465751e-08, | |
| "learning_rate": 9.097847001914515e-05, | |
| "logits/chosen": -3.921875, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -104.0, | |
| "logps/rejected": -658.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 47.375, | |
| "rewards/rejected": -41.625, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.3939899833055092, | |
| "grad_norm": 2.5101855953835184e-06, | |
| "learning_rate": 9.069868352305241e-05, | |
| "logits/chosen": -4.15625, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -96.25, | |
| "logps/rejected": -658.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.265625, | |
| "rewards/margins": 47.875, | |
| "rewards/rejected": -41.625, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.3973288814691152, | |
| "grad_norm": 1.0253067728172027e-08, | |
| "learning_rate": 9.041876372098271e-05, | |
| "logits/chosen": -4.21875, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -700.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.546875, | |
| "rewards/margins": 49.625, | |
| "rewards/rejected": -43.0, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.4006677796327212, | |
| "grad_norm": 0.41543394327163696, | |
| "learning_rate": 9.013871410074674e-05, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -86.5, | |
| "logps/rejected": -650.0, | |
| "loss": 0.1123, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 6.6875, | |
| "rewards/margins": 46.0, | |
| "rewards/rejected": -39.25, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.4040066777963272, | |
| "grad_norm": 0.0006037292769178748, | |
| "learning_rate": 8.985853815177269e-05, | |
| "logits/chosen": -4.265625, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -97.25, | |
| "logps/rejected": -626.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.09375, | |
| "rewards/margins": 46.125, | |
| "rewards/rejected": -40.125, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.4073455759599331, | |
| "grad_norm": 4.558990129854834e-10, | |
| "learning_rate": 8.957823936506292e-05, | |
| "logits/chosen": -4.0546875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -668.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.609375, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.625, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.4106844741235394, | |
| "grad_norm": 1.3576600395026617e-05, | |
| "learning_rate": 8.929782123315022e-05, | |
| "logits/chosen": -3.6953125, | |
| "logits/rejected": -2.1953125, | |
| "logps/chosen": -106.25, | |
| "logps/rejected": -712.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.375, | |
| "rewards/margins": 49.75, | |
| "rewards/rejected": -44.375, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.4140233722871454, | |
| "grad_norm": 1.5348656789981874e-09, | |
| "learning_rate": 8.901728725005449e-05, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -106.25, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0, | |
| "rewards/margins": 49.375, | |
| "rewards/rejected": -43.375, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.4173622704507514, | |
| "grad_norm": 1.130615490119169e-09, | |
| "learning_rate": 8.873664091123909e-05, | |
| "logits/chosen": -4.0546875, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -95.75, | |
| "logps/rejected": -684.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 50.375, | |
| "rewards/rejected": -44.75, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.4207011686143574, | |
| "grad_norm": 6.090725257479335e-09, | |
| "learning_rate": 8.845588571356745e-05, | |
| "logits/chosen": -4.59375, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -89.25, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.953125, | |
| "rewards/margins": 52.25, | |
| "rewards/rejected": -46.125, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.4240400667779634, | |
| "grad_norm": 6.303178088273853e-05, | |
| "learning_rate": 8.817502515525927e-05, | |
| "logits/chosen": -3.921875, | |
| "logits/rejected": -2.53125, | |
| "logps/chosen": -88.5, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.328125, | |
| "rewards/margins": 52.0, | |
| "rewards/rejected": -46.625, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.4273789649415694, | |
| "grad_norm": 1.215575307078609e-09, | |
| "learning_rate": 8.789406273584708e-05, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -67.0, | |
| "logps/rejected": -822.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.34375, | |
| "rewards/margins": 57.25, | |
| "rewards/rejected": -51.875, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.4307178631051753, | |
| "grad_norm": 1.2602808965311851e-05, | |
| "learning_rate": 8.761300195613267e-05, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -77.75, | |
| "logps/rejected": -776.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.796875, | |
| "rewards/margins": 56.0, | |
| "rewards/rejected": -50.25, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.4340567612687813, | |
| "grad_norm": 1.7794310469554375e-10, | |
| "learning_rate": 8.733184631814326e-05, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -2.6015625, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -682.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.453125, | |
| "rewards/margins": 51.125, | |
| "rewards/rejected": -45.625, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.4373956594323873, | |
| "grad_norm": 4.9994866913039004e-08, | |
| "learning_rate": 8.705059932508816e-05, | |
| "logits/chosen": -4.3125, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -94.5, | |
| "logps/rejected": -694.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.8125, | |
| "rewards/margins": 50.625, | |
| "rewards/rejected": -44.75, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.4407345575959933, | |
| "grad_norm": 1.9163441322689323e-07, | |
| "learning_rate": 8.676926448131487e-05, | |
| "logits/chosen": -4.734375, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -83.75, | |
| "logps/rejected": -736.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.625, | |
| "rewards/margins": 53.75, | |
| "rewards/rejected": -48.125, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.4440734557595993, | |
| "grad_norm": 4.263274178128995e-09, | |
| "learning_rate": 8.648784529226552e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -81.0, | |
| "logps/rejected": -728.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.171875, | |
| "rewards/margins": 54.625, | |
| "rewards/rejected": -48.5, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.4474123539232053, | |
| "grad_norm": 0.0010009096004068851, | |
| "learning_rate": 8.620634526443326e-05, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -74.75, | |
| "logps/rejected": -668.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.96875, | |
| "rewards/margins": 50.625, | |
| "rewards/rejected": -44.75, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.4507512520868113, | |
| "grad_norm": 1.0308641940071084e-08, | |
| "learning_rate": 8.59247679053184e-05, | |
| "logits/chosen": -4.75, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -59.25, | |
| "logps/rejected": -718.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.328125, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -48.5, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.4540901502504173, | |
| "grad_norm": 5.3008247959951404e-06, | |
| "learning_rate": 8.564311672338488e-05, | |
| "logits/chosen": -4.0390625, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -94.5, | |
| "logps/rejected": -728.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5, | |
| "rewards/margins": 53.125, | |
| "rewards/rejected": -47.625, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.4574290484140233, | |
| "grad_norm": 9.894330105453264e-06, | |
| "learning_rate": 8.536139522801641e-05, | |
| "logits/chosen": -4.734375, | |
| "logits/rejected": -2.2265625, | |
| "logps/chosen": -63.25, | |
| "logps/rejected": -778.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.625, | |
| "rewards/margins": 55.875, | |
| "rewards/rejected": -50.25, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.4607679465776293, | |
| "grad_norm": 1.3490364381141262e-07, | |
| "learning_rate": 8.507960692947287e-05, | |
| "logits/chosen": -4.0078125, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -133.5, | |
| "logps/rejected": -640.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 49.125, | |
| "rewards/rejected": -43.375, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.4641068447412353, | |
| "grad_norm": 1.2006904359651571e-09, | |
| "learning_rate": 8.479775533884641e-05, | |
| "logits/chosen": -4.515625, | |
| "logits/rejected": -2.625, | |
| "logps/chosen": -75.5, | |
| "logps/rejected": -702.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4375, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -47.0, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.4674457429048413, | |
| "grad_norm": 2.2769681695677946e-09, | |
| "learning_rate": 8.45158439680179e-05, | |
| "logits/chosen": -4.625, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -99.0, | |
| "logps/rejected": -644.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.328125, | |
| "rewards/margins": 50.125, | |
| "rewards/rejected": -43.875, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.4707846410684473, | |
| "grad_norm": 3.575904156116749e-11, | |
| "learning_rate": 8.423387632961305e-05, | |
| "logits/chosen": -4.5, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -77.5, | |
| "logps/rejected": -824.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.140625, | |
| "rewards/margins": 60.625, | |
| "rewards/rejected": -54.5, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.4741235392320533, | |
| "grad_norm": 5.85626310112275e-07, | |
| "learning_rate": 8.395185593695866e-05, | |
| "logits/chosen": -4.21875, | |
| "logits/rejected": -2.5859375, | |
| "logps/chosen": -103.25, | |
| "logps/rejected": -632.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.109375, | |
| "rewards/margins": 48.5, | |
| "rewards/rejected": -43.375, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.4774624373956593, | |
| "grad_norm": 3.311103853320674e-07, | |
| "learning_rate": 8.366978630403886e-05, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -96.25, | |
| "logps/rejected": -808.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 57.375, | |
| "rewards/rejected": -51.875, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.4808013355592655, | |
| "grad_norm": 0.003105483716353774, | |
| "learning_rate": 8.338767094545124e-05, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.328125, | |
| "logps/chosen": -85.25, | |
| "logps/rejected": -762.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.40625, | |
| "rewards/margins": 54.5, | |
| "rewards/rejected": -49.125, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.4841402337228715, | |
| "grad_norm": 7.841013575671241e-05, | |
| "learning_rate": 8.310551337636326e-05, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -107.75, | |
| "logps/rejected": -752.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.203125, | |
| "rewards/margins": 55.0, | |
| "rewards/rejected": -49.75, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.4874791318864775, | |
| "grad_norm": 9.359698793787175e-09, | |
| "learning_rate": 8.282331711246822e-05, | |
| "logits/chosen": -4.21875, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -80.0, | |
| "logps/rejected": -728.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 53.0, | |
| "rewards/rejected": -47.5, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.4908180300500835, | |
| "grad_norm": 1.995251464048664e-10, | |
| "learning_rate": 8.25410856699416e-05, | |
| "logits/chosen": -4.1484375, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -766.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 56.25, | |
| "rewards/rejected": -50.25, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.4941569282136895, | |
| "grad_norm": 3.125167458151168e-09, | |
| "learning_rate": 8.225882256539724e-05, | |
| "logits/chosen": -3.859375, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -117.5, | |
| "logps/rejected": -774.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.1875, | |
| "rewards/margins": 55.625, | |
| "rewards/rejected": -50.375, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.4974958263772955, | |
| "grad_norm": 1.1834481172812161e-09, | |
| "learning_rate": 8.197653131584339e-05, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -114.75, | |
| "logps/rejected": -666.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.234375, | |
| "rewards/margins": 49.625, | |
| "rewards/rejected": -44.375, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.5008347245409015, | |
| "grad_norm": 9.292146913031374e-11, | |
| "learning_rate": 8.169421543863914e-05, | |
| "logits/chosen": -4.40625, | |
| "logits/rejected": -2.5390625, | |
| "logps/chosen": -88.0, | |
| "logps/rejected": -702.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0625, | |
| "rewards/margins": 53.0, | |
| "rewards/rejected": -46.875, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.5041736227045075, | |
| "grad_norm": 5.541923586704911e-10, | |
| "learning_rate": 8.14118784514503e-05, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -100.25, | |
| "logps/rejected": -770.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 56.375, | |
| "rewards/rejected": -50.75, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.5075125208681135, | |
| "grad_norm": 2.15681126292111e-08, | |
| "learning_rate": 8.112952387220578e-05, | |
| "logits/chosen": -4.15625, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -87.25, | |
| "logps/rejected": -762.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.171875, | |
| "rewards/margins": 57.375, | |
| "rewards/rejected": -51.125, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.5108514190317195, | |
| "grad_norm": 2.62340899004343e-10, | |
| "learning_rate": 8.084715521905363e-05, | |
| "logits/chosen": -4.03125, | |
| "logits/rejected": -2.53125, | |
| "logps/chosen": -100.75, | |
| "logps/rejected": -674.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 50.875, | |
| "rewards/rejected": -45.125, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.5141903171953257, | |
| "grad_norm": 8.177906130057977e-10, | |
| "learning_rate": 8.056477601031736e-05, | |
| "logits/chosen": -4.40625, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -84.25, | |
| "logps/rejected": -710.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.953125, | |
| "rewards/margins": 54.125, | |
| "rewards/rejected": -48.0, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.5175292153589317, | |
| "grad_norm": 1.9370509970334515e-12, | |
| "learning_rate": 8.028238976445194e-05, | |
| "logits/chosen": -4.0546875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -92.5, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.9375, | |
| "rewards/margins": 53.25, | |
| "rewards/rejected": -47.25, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.5208681135225377, | |
| "grad_norm": 1.649963672634147e-11, | |
| "learning_rate": 8e-05, | |
| "logits/chosen": -4.484375, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -64.625, | |
| "logps/rejected": -846.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.0, | |
| "rewards/margins": 61.125, | |
| "rewards/rejected": -56.125, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.5242070116861437, | |
| "grad_norm": 3.1389786272484343e-06, | |
| "learning_rate": 7.971761023554807e-05, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -73.75, | |
| "logps/rejected": -720.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.234375, | |
| "rewards/margins": 52.0, | |
| "rewards/rejected": -46.75, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.5275459098497497, | |
| "grad_norm": 6.32845509329627e-09, | |
| "learning_rate": 7.943522398968266e-05, | |
| "logits/chosen": -3.9453125, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6875, | |
| "rewards/margins": 53.5, | |
| "rewards/rejected": -47.75, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.5308848080133557, | |
| "grad_norm": 6.801514462750902e-09, | |
| "learning_rate": 7.915284478094637e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -86.5, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.34375, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -48.5, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.5342237061769617, | |
| "grad_norm": 3.1257162191877796e-08, | |
| "learning_rate": 7.887047612779426e-05, | |
| "logits/chosen": -3.9140625, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -92.0, | |
| "logps/rejected": -764.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 56.75, | |
| "rewards/rejected": -50.875, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5375626043405677, | |
| "grad_norm": 2.6219735271837408e-09, | |
| "learning_rate": 7.858812154854972e-05, | |
| "logits/chosen": -4.265625, | |
| "logits/rejected": -2.5625, | |
| "logps/chosen": -87.5, | |
| "logps/rejected": -730.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 53.625, | |
| "rewards/rejected": -48.25, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.5409015025041737, | |
| "grad_norm": 1.4608904086443886e-09, | |
| "learning_rate": 7.83057845613609e-05, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -89.75, | |
| "logps/rejected": -672.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.859375, | |
| "rewards/margins": 50.75, | |
| "rewards/rejected": -44.875, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.5442404006677797, | |
| "grad_norm": 1.4853892338351216e-08, | |
| "learning_rate": 7.802346868415662e-05, | |
| "logits/chosen": -4.421875, | |
| "logits/rejected": -2.3671875, | |
| "logps/chosen": -91.75, | |
| "logps/rejected": -742.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 54.125, | |
| "rewards/rejected": -48.5, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.5475792988313857, | |
| "grad_norm": 8.356595344594098e-07, | |
| "learning_rate": 7.774117743460278e-05, | |
| "logits/chosen": -4.0546875, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -115.25, | |
| "logps/rejected": -800.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.09375, | |
| "rewards/margins": 56.375, | |
| "rewards/rejected": -50.375, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.5509181969949917, | |
| "grad_norm": 3.025586556759663e-05, | |
| "learning_rate": 7.745891433005843e-05, | |
| "logits/chosen": -4.703125, | |
| "logits/rejected": -2.5859375, | |
| "logps/chosen": -81.5, | |
| "logps/rejected": -696.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.421875, | |
| "rewards/margins": 52.25, | |
| "rewards/rejected": -46.875, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.5542570951585977, | |
| "grad_norm": 7.671878243797892e-09, | |
| "learning_rate": 7.717668288753181e-05, | |
| "logits/chosen": -4.1875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -98.5, | |
| "logps/rejected": -730.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.046875, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -47.875, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.5575959933222037, | |
| "grad_norm": 1.2859992466474068e-06, | |
| "learning_rate": 7.68944866236368e-05, | |
| "logits/chosen": -3.9921875, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -106.5, | |
| "logps/rejected": -734.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.890625, | |
| "rewards/margins": 53.125, | |
| "rewards/rejected": -48.375, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.5609348914858097, | |
| "grad_norm": 8.435268483708569e-08, | |
| "learning_rate": 7.661232905454879e-05, | |
| "logits/chosen": -4.234375, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -101.5, | |
| "logps/rejected": -796.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.25, | |
| "rewards/margins": 57.625, | |
| "rewards/rejected": -52.25, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.5642737896494157, | |
| "grad_norm": 0.0006341390544548631, | |
| "learning_rate": 7.633021369596119e-05, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -63.625, | |
| "logps/rejected": -780.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.21875, | |
| "rewards/margins": 56.75, | |
| "rewards/rejected": -51.5, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.5676126878130217, | |
| "grad_norm": 2.392030573616921e-09, | |
| "learning_rate": 7.604814406304135e-05, | |
| "logits/chosen": -4.234375, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -99.0, | |
| "logps/rejected": -736.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.796875, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -48.125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5709515859766277, | |
| "grad_norm": 7.502531235159537e-11, | |
| "learning_rate": 7.576612367038695e-05, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -764.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.640625, | |
| "rewards/margins": 55.0, | |
| "rewards/rejected": -49.25, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.5742904841402336, | |
| "grad_norm": 3.8160608006876373e-10, | |
| "learning_rate": 7.548415603198213e-05, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -105.0, | |
| "logps/rejected": -636.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.421875, | |
| "rewards/margins": 50.125, | |
| "rewards/rejected": -43.625, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.5776293823038396, | |
| "grad_norm": 4.221317073671571e-09, | |
| "learning_rate": 7.520224466115363e-05, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -90.5, | |
| "logps/rejected": -722.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 53.75, | |
| "rewards/rejected": -48.25, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.5809682804674456, | |
| "grad_norm": 2.064158427517171e-10, | |
| "learning_rate": 7.492039307052718e-05, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.3828125, | |
| "logps/chosen": -85.0, | |
| "logps/rejected": -772.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 56.375, | |
| "rewards/rejected": -50.875, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.5843071786310516, | |
| "grad_norm": 4.211127446751561e-09, | |
| "learning_rate": 7.46386047719836e-05, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -88.25, | |
| "logps/rejected": -668.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.765625, | |
| "rewards/margins": 52.0, | |
| "rewards/rejected": -46.375, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.5876460767946576, | |
| "grad_norm": 2.4332844986929558e-05, | |
| "learning_rate": 7.435688327661512e-05, | |
| "logits/chosen": -4.015625, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -103.75, | |
| "logps/rejected": -644.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 48.75, | |
| "rewards/rejected": -43.375, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.5909849749582636, | |
| "grad_norm": 1.4354889499301748e-10, | |
| "learning_rate": 7.407523209468162e-05, | |
| "logits/chosen": -4.1015625, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -98.5, | |
| "logps/rejected": -740.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.234375, | |
| "rewards/margins": 56.375, | |
| "rewards/rejected": -50.125, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.5943238731218696, | |
| "grad_norm": 3.726325359387772e-10, | |
| "learning_rate": 7.379365473556675e-05, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 52.75, | |
| "rewards/rejected": -46.875, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.5976627712854758, | |
| "grad_norm": 1.74457059909372e-09, | |
| "learning_rate": 7.35121547077345e-05, | |
| "logits/chosen": -4.078125, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -89.25, | |
| "logps/rejected": -650.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.953125, | |
| "rewards/margins": 49.125, | |
| "rewards/rejected": -43.125, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.6010016694490818, | |
| "grad_norm": 1.4945060300419755e-08, | |
| "learning_rate": 7.323073551868516e-05, | |
| "logits/chosen": -4.234375, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -95.5, | |
| "logps/rejected": -736.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.34375, | |
| "rewards/margins": 56.375, | |
| "rewards/rejected": -51.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.6043405676126878, | |
| "grad_norm": 3.741529397416343e-08, | |
| "learning_rate": 7.294940067491189e-05, | |
| "logits/chosen": -4.515625, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -73.625, | |
| "logps/rejected": -770.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.203125, | |
| "rewards/margins": 55.875, | |
| "rewards/rejected": -50.625, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.6076794657762938, | |
| "grad_norm": 3.76367270682465e-11, | |
| "learning_rate": 7.266815368185677e-05, | |
| "logits/chosen": -4.640625, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -74.5, | |
| "logps/rejected": -730.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 54.25, | |
| "rewards/rejected": -48.5, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.6110183639398998, | |
| "grad_norm": 3.4839501950045815e-08, | |
| "learning_rate": 7.238699804386737e-05, | |
| "logits/chosen": -4.21875, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -94.25, | |
| "logps/rejected": -652.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.609375, | |
| "rewards/margins": 49.125, | |
| "rewards/rejected": -43.5, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.6143572621035058, | |
| "grad_norm": 1.3145572408390649e-09, | |
| "learning_rate": 7.210593726415293e-05, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -2.6015625, | |
| "logps/chosen": -74.5, | |
| "logps/rejected": -650.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 49.5, | |
| "rewards/rejected": -43.875, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.6176961602671118, | |
| "grad_norm": 0.010065467096865177, | |
| "learning_rate": 7.182497484474075e-05, | |
| "logits/chosen": -4.3125, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -75.5, | |
| "logps/rejected": -772.0, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.234375, | |
| "rewards/margins": 58.125, | |
| "rewards/rejected": -51.875, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.6210350584307178, | |
| "grad_norm": 1.0117897839001522e-11, | |
| "learning_rate": 7.154411428643258e-05, | |
| "logits/chosen": -4.4765625, | |
| "logits/rejected": -2.515625, | |
| "logps/chosen": -90.5, | |
| "logps/rejected": -698.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 53.0, | |
| "rewards/rejected": -47.0, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.6243739565943238, | |
| "grad_norm": 4.363270704743627e-07, | |
| "learning_rate": 7.126335908876092e-05, | |
| "logits/chosen": -4.65625, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -72.625, | |
| "logps/rejected": -710.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.40625, | |
| "rewards/margins": 54.25, | |
| "rewards/rejected": -48.875, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.62771285475793, | |
| "grad_norm": 1.109244749386562e-05, | |
| "learning_rate": 7.098271274994556e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.546875, | |
| "logps/chosen": -91.75, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.421875, | |
| "rewards/margins": 52.375, | |
| "rewards/rejected": -46.875, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.631051752921536, | |
| "grad_norm": 3.7534000907335496e-10, | |
| "learning_rate": 7.070217876684981e-05, | |
| "logits/chosen": -4.15625, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -101.75, | |
| "logps/rejected": -774.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.234375, | |
| "rewards/margins": 55.25, | |
| "rewards/rejected": -50.0, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.634390651085142, | |
| "grad_norm": 1.1033153946016228e-08, | |
| "learning_rate": 7.042176063493708e-05, | |
| "logits/chosen": -4.0234375, | |
| "logits/rejected": -2.5234375, | |
| "logps/chosen": -102.75, | |
| "logps/rejected": -642.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4375, | |
| "rewards/margins": 48.0, | |
| "rewards/rejected": -42.625, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.637729549248748, | |
| "grad_norm": 4.122978625176188e-10, | |
| "learning_rate": 7.014146184822732e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.671875, | |
| "logps/chosen": -105.75, | |
| "logps/rejected": -720.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 54.375, | |
| "rewards/rejected": -48.5, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.641068447412354, | |
| "grad_norm": 1.314667042606743e-05, | |
| "learning_rate": 6.98612858992533e-05, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -80.5, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.921875, | |
| "rewards/margins": 54.25, | |
| "rewards/rejected": -48.25, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.64440734557596, | |
| "grad_norm": 1.1127249122111493e-10, | |
| "learning_rate": 6.958123627901733e-05, | |
| "logits/chosen": -3.9921875, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -111.25, | |
| "logps/rejected": -686.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.25, | |
| "rewards/margins": 51.25, | |
| "rewards/rejected": -45.0, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.647746243739566, | |
| "grad_norm": 2.7546880687623343e-07, | |
| "learning_rate": 6.930131647694761e-05, | |
| "logits/chosen": -4.328125, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -80.0, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4375, | |
| "rewards/margins": 53.0, | |
| "rewards/rejected": -47.5, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.651085141903172, | |
| "grad_norm": 4.5741360699125266e-10, | |
| "learning_rate": 6.90215299808549e-05, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -83.25, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.328125, | |
| "rewards/margins": 53.75, | |
| "rewards/rejected": -47.375, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.654424040066778, | |
| "grad_norm": 3.2220714274444617e-07, | |
| "learning_rate": 6.874188027688877e-05, | |
| "logits/chosen": -4.515625, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -82.25, | |
| "logps/rejected": -738.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.15625, | |
| "rewards/margins": 53.25, | |
| "rewards/rejected": -48.125, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.657762938230384, | |
| "grad_norm": 7.868575124803101e-09, | |
| "learning_rate": 6.846237084949454e-05, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -85.5, | |
| "logps/rejected": -754.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 56.0, | |
| "rewards/rejected": -50.25, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.66110183639399, | |
| "grad_norm": 4.101905481945778e-10, | |
| "learning_rate": 6.818300518136964e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -91.5, | |
| "logps/rejected": -756.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.421875, | |
| "rewards/margins": 55.375, | |
| "rewards/rejected": -49.0, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.664440734557596, | |
| "grad_norm": 1.494113721633994e-10, | |
| "learning_rate": 6.790378675342013e-05, | |
| "logits/chosen": -4.234375, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -85.0, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.921875, | |
| "rewards/margins": 54.0, | |
| "rewards/rejected": -48.125, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.667779632721202, | |
| "grad_norm": 7.279717627317606e-12, | |
| "learning_rate": 6.762471904471765e-05, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -70.625, | |
| "logps/rejected": -748.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 56.75, | |
| "rewards/rejected": -50.75, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.671118530884808, | |
| "grad_norm": 3.969352402322102e-09, | |
| "learning_rate": 6.73458055324557e-05, | |
| "logits/chosen": -4.296875, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -72.25, | |
| "logps/rejected": -828.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 59.5, | |
| "rewards/rejected": -53.875, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.674457429048414, | |
| "grad_norm": 1.531030875412398e-07, | |
| "learning_rate": 6.706704969190657e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.59375, | |
| "logps/chosen": -81.5, | |
| "logps/rejected": -666.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 51.25, | |
| "rewards/rejected": -45.625, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.67779632721202, | |
| "grad_norm": 8.170403797969072e-10, | |
| "learning_rate": 6.678845499637793e-05, | |
| "logits/chosen": -4.2890625, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -81.0, | |
| "logps/rejected": -702.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -47.0, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.681135225375626, | |
| "grad_norm": 2.2174371713812313e-12, | |
| "learning_rate": 6.651002491716963e-05, | |
| "logits/chosen": -4.1953125, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.53125, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -48.375, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.684474123539232, | |
| "grad_norm": 3.214521704375528e-10, | |
| "learning_rate": 6.623176292353034e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -73.25, | |
| "logps/rejected": -736.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.78125, | |
| "rewards/margins": 54.75, | |
| "rewards/rejected": -49.0, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.687813021702838, | |
| "grad_norm": 1.9167500919792246e-10, | |
| "learning_rate": 6.595367248261435e-05, | |
| "logits/chosen": -4.703125, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -59.5, | |
| "logps/rejected": -838.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.609375, | |
| "rewards/margins": 59.5, | |
| "rewards/rejected": -54.0, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.691151919866444, | |
| "grad_norm": 8.242089677423792e-08, | |
| "learning_rate": 6.567575705943849e-05, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -79.5, | |
| "logps/rejected": -688.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 51.625, | |
| "rewards/rejected": -46.375, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.69449081803005, | |
| "grad_norm": 1.890362355538855e-08, | |
| "learning_rate": 6.539802011683875e-05, | |
| "logits/chosen": -3.8125, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -100.0, | |
| "logps/rejected": -724.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.96875, | |
| "rewards/margins": 52.375, | |
| "rewards/rejected": -47.25, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.697829716193656, | |
| "grad_norm": 1.2278357497397252e-10, | |
| "learning_rate": 6.51204651154274e-05, | |
| "logits/chosen": -4.2109375, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -97.75, | |
| "logps/rejected": -740.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.46875, | |
| "rewards/margins": 53.5, | |
| "rewards/rejected": -48.0, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.701168614357262, | |
| "grad_norm": 9.539391498947225e-09, | |
| "learning_rate": 6.484309551354952e-05, | |
| "logits/chosen": -4.78125, | |
| "logits/rejected": -2.5078125, | |
| "logps/chosen": -75.75, | |
| "logps/rejected": -736.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 55.125, | |
| "rewards/rejected": -49.375, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.704507512520868, | |
| "grad_norm": 7.297229043246034e-09, | |
| "learning_rate": 6.456591476724026e-05, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.10546875, | |
| "logps/chosen": -81.25, | |
| "logps/rejected": -768.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 55.25, | |
| "rewards/rejected": -49.25, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.707846410684474, | |
| "grad_norm": 1.4188211716614774e-09, | |
| "learning_rate": 6.428892633018158e-05, | |
| "logits/chosen": -4.59375, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -83.875, | |
| "logps/rejected": -718.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 53.125, | |
| "rewards/rejected": -47.375, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.7111853088480802, | |
| "grad_norm": 3.5108431717389976e-08, | |
| "learning_rate": 6.401213365365921e-05, | |
| "logits/chosen": -4.15625, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -95.75, | |
| "logps/rejected": -766.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.796875, | |
| "rewards/margins": 56.125, | |
| "rewards/rejected": -50.25, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.7145242070116862, | |
| "grad_norm": 6.461980950334123e-11, | |
| "learning_rate": 6.373554018651981e-05, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -75.25, | |
| "logps/rejected": -836.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.53125, | |
| "rewards/margins": 60.5, | |
| "rewards/rejected": -55.125, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.7178631051752922, | |
| "grad_norm": 2.938630450088908e-10, | |
| "learning_rate": 6.345914937512772e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.625, | |
| "logps/chosen": -88.75, | |
| "logps/rejected": -720.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.859375, | |
| "rewards/margins": 54.0, | |
| "rewards/rejected": -48.25, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.7212020033388982, | |
| "grad_norm": 7.139490387775282e-11, | |
| "learning_rate": 6.318296466332232e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -83.0, | |
| "logps/rejected": -770.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.109375, | |
| "rewards/margins": 56.0, | |
| "rewards/rejected": -49.875, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.7245409015025042, | |
| "grad_norm": 6.043272549050016e-10, | |
| "learning_rate": 6.290698949237494e-05, | |
| "logits/chosen": -3.9921875, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -99.5, | |
| "logps/rejected": -764.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.625, | |
| "rewards/margins": 55.375, | |
| "rewards/rejected": -49.75, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.7278797996661102, | |
| "grad_norm": 5.376046829042025e-06, | |
| "learning_rate": 6.2631227300946e-05, | |
| "logits/chosen": -4.5, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -75.75, | |
| "logps/rejected": -698.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.1875, | |
| "rewards/margins": 52.75, | |
| "rewards/rejected": -46.625, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.7312186978297162, | |
| "grad_norm": 1.3798023834610262e-10, | |
| "learning_rate": 6.235568152504226e-05, | |
| "logits/chosen": -4.265625, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -78.5, | |
| "logps/rejected": -682.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.78125, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -46.5, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.7345575959933222, | |
| "grad_norm": 0.0643463060259819, | |
| "learning_rate": 6.20803555979738e-05, | |
| "logits/chosen": -4.578125, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -90.5, | |
| "logps/rejected": -688.0, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.046875, | |
| "rewards/margins": 51.625, | |
| "rewards/rejected": -46.5, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.7378964941569284, | |
| "grad_norm": 4.92718896794031e-08, | |
| "learning_rate": 6.18052529503115e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -78.0, | |
| "logps/rejected": -712.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.359375, | |
| "rewards/margins": 51.5, | |
| "rewards/rejected": -46.125, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.7412353923205344, | |
| "grad_norm": 1.0779488235357348e-07, | |
| "learning_rate": 6.153037700984412e-05, | |
| "logits/chosen": -4.390625, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -80.125, | |
| "logps/rejected": -744.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.96875, | |
| "rewards/margins": 54.625, | |
| "rewards/rejected": -49.625, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.7445742904841404, | |
| "grad_norm": 2.592769610121337e-10, | |
| "learning_rate": 6.125573120153565e-05, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -102.5, | |
| "logps/rejected": -734.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.609375, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -48.125, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.7479131886477464, | |
| "grad_norm": 1.1691867474183937e-09, | |
| "learning_rate": 6.098131894748267e-05, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.53125, | |
| "logps/chosen": -80.0, | |
| "logps/rejected": -680.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 51.375, | |
| "rewards/rejected": -45.75, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.7512520868113524, | |
| "grad_norm": 5.292809746038074e-08, | |
| "learning_rate": 6.070714366687152e-05, | |
| "logits/chosen": -4.34375, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -686.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 51.25, | |
| "rewards/rejected": -45.75, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.7545909849749584, | |
| "grad_norm": 6.529545237832224e-10, | |
| "learning_rate": 6.0433208775936015e-05, | |
| "logits/chosen": -4.71875, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -75.0, | |
| "logps/rejected": -686.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 52.375, | |
| "rewards/rejected": -46.875, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.7579298831385644, | |
| "grad_norm": 1.5020368664409034e-05, | |
| "learning_rate": 6.015951768791461e-05, | |
| "logits/chosen": -4.25, | |
| "logits/rejected": -2.3359375, | |
| "logps/chosen": -77.0, | |
| "logps/rejected": -730.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 53.75, | |
| "rewards/rejected": -48.0, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.7612687813021703, | |
| "grad_norm": 1.7290001778746955e-05, | |
| "learning_rate": 5.9886073813008015e-05, | |
| "logits/chosen": -4.53125, | |
| "logits/rejected": -2.328125, | |
| "logps/chosen": -73.5, | |
| "logps/rejected": -662.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.09375, | |
| "rewards/margins": 49.625, | |
| "rewards/rejected": -44.375, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.7646076794657763, | |
| "grad_norm": 2.072624738502782e-09, | |
| "learning_rate": 5.961288055833656e-05, | |
| "logits/chosen": -3.734375, | |
| "logits/rejected": -2.5546875, | |
| "logps/chosen": -115.25, | |
| "logps/rejected": -608.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 47.0, | |
| "rewards/rejected": -41.5, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.7679465776293823, | |
| "grad_norm": 0.013802506029605865, | |
| "learning_rate": 5.9339941327897977e-05, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.71875, | |
| "logps/chosen": -79.75, | |
| "logps/rejected": -756.0, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.125, | |
| "rewards/margins": 54.5, | |
| "rewards/rejected": -49.375, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7712854757929883, | |
| "grad_norm": 2.484111405465228e-08, | |
| "learning_rate": 5.906725952252476e-05, | |
| "logits/chosen": -4.515625, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -86.0, | |
| "logps/rejected": -762.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 55.5, | |
| "rewards/rejected": -49.875, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.7746243739565943, | |
| "grad_norm": 1.7635564120155323e-09, | |
| "learning_rate": 5.879483853984187e-05, | |
| "logits/chosen": -4.09375, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.640625, | |
| "rewards/margins": 53.25, | |
| "rewards/rejected": -47.625, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.7779632721202003, | |
| "grad_norm": 2.7364133181606576e-10, | |
| "learning_rate": 5.852268177422451e-05, | |
| "logits/chosen": -4.1796875, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -93.75, | |
| "logps/rejected": -836.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 57.875, | |
| "rewards/rejected": -52.625, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.7813021702838063, | |
| "grad_norm": 5.411819259393269e-08, | |
| "learning_rate": 5.8250792616755554e-05, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -82.875, | |
| "logps/rejected": -712.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.828125, | |
| "rewards/margins": 52.25, | |
| "rewards/rejected": -46.375, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.7846410684474123, | |
| "grad_norm": 1.0748289758222285e-10, | |
| "learning_rate": 5.7979174455183625e-05, | |
| "logits/chosen": -4.390625, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -47.125, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.7879799666110183, | |
| "grad_norm": 5.165533001338929e-10, | |
| "learning_rate": 5.7707830673880635e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.5703125, | |
| "logps/chosen": -71.75, | |
| "logps/rejected": -698.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.8125, | |
| "rewards/margins": 53.625, | |
| "rewards/rejected": -47.75, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.7913188647746243, | |
| "grad_norm": 3.2588772569397406e-08, | |
| "learning_rate": 5.743676465379977e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -71.25, | |
| "logps/rejected": -658.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 49.875, | |
| "rewards/rejected": -44.125, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.7946577629382303, | |
| "grad_norm": 1.4472433917944727e-08, | |
| "learning_rate": 5.71659797724333e-05, | |
| "logits/chosen": -4.0625, | |
| "logits/rejected": -2.5078125, | |
| "logps/chosen": -92.75, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 52.0, | |
| "rewards/rejected": -46.125, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.7979966611018363, | |
| "grad_norm": 5.101317128719529e-07, | |
| "learning_rate": 5.6895479403770415e-05, | |
| "logits/chosen": -4.4375, | |
| "logits/rejected": -2.5703125, | |
| "logps/chosen": -84.75, | |
| "logps/rejected": -694.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 52.625, | |
| "rewards/rejected": -47.0, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.8013355592654423, | |
| "grad_norm": 1.3090671302506962e-07, | |
| "learning_rate": 5.6625266918255355e-05, | |
| "logits/chosen": -4.59375, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -78.375, | |
| "logps/rejected": -634.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.953125, | |
| "rewards/margins": 48.875, | |
| "rewards/rejected": -42.875, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.8046744574290483, | |
| "grad_norm": 5.188319392168683e-12, | |
| "learning_rate": 5.6355345682745285e-05, | |
| "logits/chosen": -4.5546875, | |
| "logits/rejected": -2.3828125, | |
| "logps/chosen": -84.125, | |
| "logps/rejected": -796.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.375, | |
| "rewards/margins": 57.875, | |
| "rewards/rejected": -52.5, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.8080133555926543, | |
| "grad_norm": 2.993649275140342e-07, | |
| "learning_rate": 5.608571906046841e-05, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -70.75, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.828125, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -47.75, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.8113522537562603, | |
| "grad_norm": 3.541090380990153e-12, | |
| "learning_rate": 5.5816390410982e-05, | |
| "logits/chosen": -4.21875, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -96.0, | |
| "logps/rejected": -802.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.21875, | |
| "rewards/margins": 58.5, | |
| "rewards/rejected": -52.375, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.8146911519198663, | |
| "grad_norm": 2.76508788088492e-10, | |
| "learning_rate": 5.5547363090130596e-05, | |
| "logits/chosen": -4.0234375, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -98.75, | |
| "logps/rejected": -828.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.28125, | |
| "rewards/margins": 58.25, | |
| "rewards/rejected": -53.0, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.8180300500834723, | |
| "grad_norm": 1.0243940096188453e-06, | |
| "learning_rate": 5.5278640450004216e-05, | |
| "logits/chosen": -4.046875, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -108.0, | |
| "logps/rejected": -680.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.765625, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.375, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.8213689482470785, | |
| "grad_norm": 1.3393444575626745e-08, | |
| "learning_rate": 5.501022583889647e-05, | |
| "logits/chosen": -4.171875, | |
| "logits/rejected": -2.3359375, | |
| "logps/chosen": -109.25, | |
| "logps/rejected": -684.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.8125, | |
| "rewards/margins": 50.625, | |
| "rewards/rejected": -44.75, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.8247078464106845, | |
| "grad_norm": 1.8362791820436541e-07, | |
| "learning_rate": 5.474212260126299e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -86.5, | |
| "logps/rejected": -684.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.59375, | |
| "rewards/margins": 50.25, | |
| "rewards/rejected": -44.75, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.8280467445742905, | |
| "grad_norm": 3.473892036254256e-07, | |
| "learning_rate": 5.4474334077679604e-05, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -88.0, | |
| "logps/rejected": -816.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.703125, | |
| "rewards/margins": 58.5, | |
| "rewards/rejected": -52.875, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.8313856427378965, | |
| "grad_norm": 1.5503935912875022e-08, | |
| "learning_rate": 5.4206863604800853e-05, | |
| "logits/chosen": -4.484375, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -89.0, | |
| "logps/rejected": -662.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0625, | |
| "rewards/margins": 50.125, | |
| "rewards/rejected": -44.0, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.8347245409015025, | |
| "grad_norm": 3.2570995900371713e-10, | |
| "learning_rate": 5.393971451531833e-05, | |
| "logits/chosen": -4.578125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -81.75, | |
| "logps/rejected": -710.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.84375, | |
| "rewards/margins": 53.5, | |
| "rewards/rejected": -47.625, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.8380634390651085, | |
| "grad_norm": 3.935995973591844e-09, | |
| "learning_rate": 5.36728901379192e-05, | |
| "logits/chosen": -4.328125, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -98.5, | |
| "logps/rejected": -694.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.71875, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -46.625, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.8414023372287145, | |
| "grad_norm": 1.3496583850525212e-09, | |
| "learning_rate": 5.34063937972447e-05, | |
| "logits/chosen": -4.0390625, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -109.75, | |
| "logps/rejected": -802.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 58.875, | |
| "rewards/rejected": -53.25, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.8447412353923205, | |
| "grad_norm": 2.9108708776703907e-09, | |
| "learning_rate": 5.3140228813848656e-05, | |
| "logits/chosen": -3.796875, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -103.25, | |
| "logps/rejected": -772.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.859375, | |
| "rewards/margins": 57.0, | |
| "rewards/rejected": -51.125, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.8480801335559267, | |
| "grad_norm": 3.852580630336888e-05, | |
| "learning_rate": 5.287439850415627e-05, | |
| "logits/chosen": -4.421875, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -91.5, | |
| "logps/rejected": -692.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 50.375, | |
| "rewards/rejected": -44.75, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.8514190317195327, | |
| "grad_norm": 7.022646464349691e-09, | |
| "learning_rate": 5.260890618042261e-05, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -78.125, | |
| "logps/rejected": -714.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.71875, | |
| "rewards/margins": 54.0, | |
| "rewards/rejected": -48.125, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.8547579298831387, | |
| "grad_norm": 9.849737345191123e-12, | |
| "learning_rate": 5.234375515069149e-05, | |
| "logits/chosen": -4.25, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -76.0, | |
| "logps/rejected": -690.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 52.625, | |
| "rewards/rejected": -46.75, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.8580968280467447, | |
| "grad_norm": 7.22921613487415e-05, | |
| "learning_rate": 5.207894871875419e-05, | |
| "logits/chosen": -4.328125, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -90.5, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.765625, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -46.75, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.8614357262103507, | |
| "grad_norm": 5.320074936143726e-10, | |
| "learning_rate": 5.1814490184108204e-05, | |
| "logits/chosen": -3.96875, | |
| "logits/rejected": -2.3828125, | |
| "logps/chosen": -94.25, | |
| "logps/rejected": -784.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.734375, | |
| "rewards/margins": 57.0, | |
| "rewards/rejected": -52.375, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.8647746243739567, | |
| "grad_norm": 1.9208787449542797e-08, | |
| "learning_rate": 5.155038284191632e-05, | |
| "logits/chosen": -4.671875, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -84.0, | |
| "logps/rejected": -688.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.015625, | |
| "rewards/margins": 52.75, | |
| "rewards/rejected": -46.75, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.8681135225375627, | |
| "grad_norm": 1.4424077487973364e-09, | |
| "learning_rate": 5.1286629982965375e-05, | |
| "logits/chosen": -4.6875, | |
| "logits/rejected": -2.4140625, | |
| "logps/chosen": -95.5, | |
| "logps/rejected": -722.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.421875, | |
| "rewards/margins": 52.625, | |
| "rewards/rejected": -47.375, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8714524207011687, | |
| "grad_norm": 2.7382171530199173e-10, | |
| "learning_rate": 5.102323489362542e-05, | |
| "logits/chosen": -4.484375, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -78.5, | |
| "logps/rejected": -700.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.328125, | |
| "rewards/margins": 52.375, | |
| "rewards/rejected": -47.125, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.8747913188647747, | |
| "grad_norm": 3.5231098594046273e-10, | |
| "learning_rate": 5.076020085580856e-05, | |
| "logits/chosen": -4.625, | |
| "logits/rejected": -2.2734375, | |
| "logps/chosen": -75.25, | |
| "logps/rejected": -728.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.03125, | |
| "rewards/margins": 53.5, | |
| "rewards/rejected": -47.375, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.8781302170283807, | |
| "grad_norm": 3.248313618087195e-08, | |
| "learning_rate": 5.049753114692829e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.2890625, | |
| "logps/chosen": -75.75, | |
| "logps/rejected": -694.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 52.125, | |
| "rewards/rejected": -46.5, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.8814691151919867, | |
| "grad_norm": 4.1881906831520155e-09, | |
| "learning_rate": 5.023522903985853e-05, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -80.75, | |
| "logps/rejected": -768.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.421875, | |
| "rewards/margins": 55.375, | |
| "rewards/rejected": -49.875, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.8848080133555927, | |
| "grad_norm": 1.2319371300861803e-08, | |
| "learning_rate": 4.9973297802892824e-05, | |
| "logits/chosen": -4.625, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -77.25, | |
| "logps/rejected": -694.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.328125, | |
| "rewards/margins": 51.5, | |
| "rewards/rejected": -46.25, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.8881469115191987, | |
| "grad_norm": 1.6527255258802498e-11, | |
| "learning_rate": 4.971174069970375e-05, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -76.25, | |
| "logps/rejected": -786.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.25, | |
| "rewards/margins": 55.5, | |
| "rewards/rejected": -50.25, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.8914858096828047, | |
| "grad_norm": 1.6659939205965202e-08, | |
| "learning_rate": 4.945056098930204e-05, | |
| "logits/chosen": -4.40625, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -86.25, | |
| "logps/rejected": -710.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.90625, | |
| "rewards/margins": 52.0, | |
| "rewards/rejected": -47.125, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.8948247078464107, | |
| "grad_norm": 4.976216327889915e-09, | |
| "learning_rate": 4.9189761925996226e-05, | |
| "logits/chosen": -3.7890625, | |
| "logits/rejected": -2.4453125, | |
| "logps/chosen": -102.25, | |
| "logps/rejected": -716.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.1875, | |
| "rewards/margins": 52.75, | |
| "rewards/rejected": -47.625, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.8981636060100167, | |
| "grad_norm": 5.617463716411919e-10, | |
| "learning_rate": 4.8929346759351856e-05, | |
| "logits/chosen": -4.453125, | |
| "logits/rejected": -2.59375, | |
| "logps/chosen": -72.75, | |
| "logps/rejected": -670.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 51.25, | |
| "rewards/rejected": -45.375, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.9015025041736227, | |
| "grad_norm": 8.406910168012871e-10, | |
| "learning_rate": 4.8669318734151205e-05, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.5859375, | |
| "logps/chosen": -106.25, | |
| "logps/rejected": -636.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.984375, | |
| "rewards/margins": 49.125, | |
| "rewards/rejected": -43.0, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.9048414023372287, | |
| "grad_norm": 1.8631781983913243e-07, | |
| "learning_rate": 4.840968109035271e-05, | |
| "logits/chosen": -4.59375, | |
| "logits/rejected": -2.53125, | |
| "logps/chosen": -64.625, | |
| "logps/rejected": -740.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.984375, | |
| "rewards/margins": 54.375, | |
| "rewards/rejected": -49.5, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.9081803005008346, | |
| "grad_norm": 2.908484009189749e-10, | |
| "learning_rate": 4.8150437063050605e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.6640625, | |
| "logps/chosen": -81.5, | |
| "logps/rejected": -746.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.25, | |
| "rewards/margins": 55.875, | |
| "rewards/rejected": -50.625, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.9115191986644406, | |
| "grad_norm": 1.1384876188458293e-06, | |
| "learning_rate": 4.7891589882434714e-05, | |
| "logits/chosen": -4.3125, | |
| "logits/rejected": -2.4765625, | |
| "logps/chosen": -72.625, | |
| "logps/rejected": -748.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.78125, | |
| "rewards/margins": 55.875, | |
| "rewards/rejected": -51.0, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.9148580968280466, | |
| "grad_norm": 1.7041387398442076e-11, | |
| "learning_rate": 4.763314277375008e-05, | |
| "logits/chosen": -3.7578125, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -98.25, | |
| "logps/rejected": -724.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.21875, | |
| "rewards/margins": 54.0, | |
| "rewards/rejected": -47.75, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.9181969949916526, | |
| "grad_norm": 0.0002684830396901816, | |
| "learning_rate": 4.7375098957256905e-05, | |
| "logits/chosen": -3.9609375, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -120.0, | |
| "logps/rejected": -628.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.53125, | |
| "rewards/margins": 46.625, | |
| "rewards/rejected": -41.125, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.9215358931552586, | |
| "grad_norm": 1.264372144760273e-07, | |
| "learning_rate": 4.711746164819026e-05, | |
| "logits/chosen": -4.0234375, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -105.0, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.828125, | |
| "rewards/margins": 53.25, | |
| "rewards/rejected": -47.375, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.9248747913188646, | |
| "grad_norm": 0.00022175066987983882, | |
| "learning_rate": 4.6860234056720215e-05, | |
| "logits/chosen": -4.4296875, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -81.0, | |
| "logps/rejected": -684.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.125, | |
| "rewards/margins": 51.0, | |
| "rewards/rejected": -45.0, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.9282136894824706, | |
| "grad_norm": 1.208756259529764e-07, | |
| "learning_rate": 4.6603419387911695e-05, | |
| "logits/chosen": -4.3125, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -77.75, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 53.375, | |
| "rewards/rejected": -47.875, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.9315525876460768, | |
| "grad_norm": 5.137899550256009e-10, | |
| "learning_rate": 4.63470208416846e-05, | |
| "logits/chosen": -4.359375, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -86.0, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.9375, | |
| "rewards/margins": 54.25, | |
| "rewards/rejected": -48.375, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.9348914858096828, | |
| "grad_norm": 8.149926067346769e-09, | |
| "learning_rate": 4.609104161277392e-05, | |
| "logits/chosen": -4.015625, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -103.75, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 54.0, | |
| "rewards/rejected": -48.375, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.9382303839732888, | |
| "grad_norm": 1.3904052131863409e-08, | |
| "learning_rate": 4.5835484890689914e-05, | |
| "logits/chosen": -4.046875, | |
| "logits/rejected": -2.3125, | |
| "logps/chosen": -115.0, | |
| "logps/rejected": -730.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.203125, | |
| "rewards/margins": 53.75, | |
| "rewards/rejected": -47.5, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.9415692821368948, | |
| "grad_norm": 8.70739924607733e-08, | |
| "learning_rate": 4.558035385967853e-05, | |
| "logits/chosen": -4.5, | |
| "logits/rejected": -2.4609375, | |
| "logps/chosen": -84.75, | |
| "logps/rejected": -720.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.390625, | |
| "rewards/margins": 53.625, | |
| "rewards/rejected": -48.25, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.9449081803005008, | |
| "grad_norm": 0.8842443227767944, | |
| "learning_rate": 4.532565169868134e-05, | |
| "logits/chosen": -4.375, | |
| "logits/rejected": -2.1484375, | |
| "logps/chosen": -95.25, | |
| "logps/rejected": -716.0, | |
| "loss": 0.2373, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 5.78125, | |
| "rewards/margins": 50.625, | |
| "rewards/rejected": -44.875, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.9482470784641068, | |
| "grad_norm": 9.874440820567543e-09, | |
| "learning_rate": 4.507138158129642e-05, | |
| "logits/chosen": -4.421875, | |
| "logits/rejected": -2.3125, | |
| "logps/chosen": -84.0, | |
| "logps/rejected": -822.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 58.5, | |
| "rewards/rejected": -52.75, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.9515859766277128, | |
| "grad_norm": 1.259659737629093e-11, | |
| "learning_rate": 4.481754667573846e-05, | |
| "logits/chosen": -4.5625, | |
| "logits/rejected": -2.4921875, | |
| "logps/chosen": -80.75, | |
| "logps/rejected": -732.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0, | |
| "rewards/margins": 54.375, | |
| "rewards/rejected": -48.25, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.9549248747913188, | |
| "grad_norm": 7.668348178668793e-10, | |
| "learning_rate": 4.4564150144799346e-05, | |
| "logits/chosen": -4.140625, | |
| "logits/rejected": -2.3984375, | |
| "logps/chosen": -93.75, | |
| "logps/rejected": -740.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.8125, | |
| "rewards/margins": 55.25, | |
| "rewards/rejected": -49.375, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.9582637729549248, | |
| "grad_norm": 1.0073026857071454e-08, | |
| "learning_rate": 4.431119514580897e-05, | |
| "logits/chosen": -4.546875, | |
| "logits/rejected": -2.6328125, | |
| "logps/chosen": -80.0, | |
| "logps/rejected": -708.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 53.125, | |
| "rewards/rejected": -47.375, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.961602671118531, | |
| "grad_norm": 4.297049116530616e-08, | |
| "learning_rate": 4.405868483059548e-05, | |
| "logits/chosen": -3.6171875, | |
| "logits/rejected": -2.4296875, | |
| "logps/chosen": -112.5, | |
| "logps/rejected": -678.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.875, | |
| "rewards/margins": 50.125, | |
| "rewards/rejected": -45.25, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.964941569282137, | |
| "grad_norm": 1.041563432724324e-10, | |
| "learning_rate": 4.3806622345446465e-05, | |
| "logits/chosen": -4.109375, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -109.0, | |
| "logps/rejected": -664.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.09375, | |
| "rewards/margins": 49.875, | |
| "rewards/rejected": -43.75, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.968280467445743, | |
| "grad_norm": 3.955933670918288e-12, | |
| "learning_rate": 4.3555010831069425e-05, | |
| "logits/chosen": -4.578125, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -89.0, | |
| "logps/rejected": -658.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.984375, | |
| "rewards/margins": 51.75, | |
| "rewards/rejected": -45.875, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.971619365609349, | |
| "grad_norm": 2.0798443074454553e-06, | |
| "learning_rate": 4.330385342255275e-05, | |
| "logits/chosen": -3.9921875, | |
| "logits/rejected": -2.3515625, | |
| "logps/chosen": -97.0, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.265625, | |
| "rewards/margins": 51.875, | |
| "rewards/rejected": -47.625, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.974958263772955, | |
| "grad_norm": 1.0061744770695213e-09, | |
| "learning_rate": 4.305315324932675e-05, | |
| "logits/chosen": -4.46875, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -108.25, | |
| "logps/rejected": -688.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.15625, | |
| "rewards/margins": 51.125, | |
| "rewards/rejected": -45.875, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.978297161936561, | |
| "grad_norm": 1.4001835246801875e-09, | |
| "learning_rate": 4.280291343512439e-05, | |
| "logits/chosen": -4.203125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -94.75, | |
| "logps/rejected": -726.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 53.875, | |
| "rewards/rejected": -48.5, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.981636060100167, | |
| "grad_norm": 7.307075083895498e-11, | |
| "learning_rate": 4.255313709794271e-05, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -98.0, | |
| "logps/rejected": -684.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4375, | |
| "rewards/margins": 51.5, | |
| "rewards/rejected": -46.125, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.984974958263773, | |
| "grad_norm": 2.983379931986718e-10, | |
| "learning_rate": 4.230382735000376e-05, | |
| "logits/chosen": -4.40625, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -79.25, | |
| "logps/rejected": -700.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.796875, | |
| "rewards/margins": 52.375, | |
| "rewards/rejected": -46.5, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.988313856427379, | |
| "grad_norm": 7.869925156001045e-09, | |
| "learning_rate": 4.2054987297715805e-05, | |
| "logits/chosen": -4.28125, | |
| "logits/rejected": -2.20703125, | |
| "logps/chosen": -92.0, | |
| "logps/rejected": -730.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.515625, | |
| "rewards/margins": 52.75, | |
| "rewards/rejected": -47.25, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.991652754590985, | |
| "grad_norm": 5.2440864983793745e-09, | |
| "learning_rate": 4.180662004163484e-05, | |
| "logits/chosen": -4.765625, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -77.25, | |
| "logps/rejected": -690.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 52.5, | |
| "rewards/rejected": -46.875, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.994991652754591, | |
| "grad_norm": 2.159956588587697e-11, | |
| "learning_rate": 4.1558728676425566e-05, | |
| "logits/chosen": -4.609375, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -72.25, | |
| "logps/rejected": -674.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.46875, | |
| "rewards/margins": 50.875, | |
| "rewards/rejected": -45.25, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.998330550918197, | |
| "grad_norm": 9.040629911361009e-10, | |
| "learning_rate": 4.131131629082335e-05, | |
| "logits/chosen": -4.3203125, | |
| "logits/rejected": -2.5703125, | |
| "logps/chosen": -101.5, | |
| "logps/rejected": -704.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.421875, | |
| "rewards/margins": 52.625, | |
| "rewards/rejected": -47.25, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.309617674560286e-06, | |
| "learning_rate": 4.106438596759518e-05, | |
| "logits/chosen": -3.515625, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -97.5, | |
| "logps/rejected": -792.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.9375, | |
| "rewards/margins": 60.25, | |
| "rewards/rejected": -55.25, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |