| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 468, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002136752136752137, |
| "grad_norm": 32.17777061214746, |
| "learning_rate": 1.4893617021276595e-08, |
| "logits/chosen": -0.279296875, |
| "logits/rejected": -0.35546875, |
| "logps/chosen": -0.99609375, |
| "logps/rejected": -0.890625, |
| "loss": 1.0, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004273504273504274, |
| "grad_norm": 35.04133277530198, |
| "learning_rate": 2.978723404255319e-08, |
| "logits/chosen": -0.41796875, |
| "logits/rejected": -0.283203125, |
| "logps/chosen": -1.6796875, |
| "logps/rejected": -1.9453125, |
| "loss": 1.0, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00641025641025641, |
| "grad_norm": 45.030473466856385, |
| "learning_rate": 4.468085106382978e-08, |
| "logits/chosen": -0.419921875, |
| "logits/rejected": -0.41015625, |
| "logps/chosen": -0.86328125, |
| "logps/rejected": -0.8203125, |
| "loss": 0.9984, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.00042724609375, |
| "rewards/margins": -0.0001220703125, |
| "rewards/rejected": -0.00030517578125, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008547008547008548, |
| "grad_norm": 20.44290602161885, |
| "learning_rate": 5.957446808510638e-08, |
| "logits/chosen": -0.208984375, |
| "logits/rejected": -0.23046875, |
| "logps/chosen": -2.0625, |
| "logps/rejected": -1.7109375, |
| "loss": 0.9998, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00323486328125, |
| "rewards/margins": -0.00140380859375, |
| "rewards/rejected": -0.0018310546875, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010683760683760684, |
| "grad_norm": 25.98006866123092, |
| "learning_rate": 7.446808510638298e-08, |
| "logits/chosen": -0.439453125, |
| "logits/rejected": -0.421875, |
| "logps/chosen": -0.7109375, |
| "logps/rejected": -0.6796875, |
| "loss": 0.9991, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 3.0517578125e-05, |
| "rewards/margins": -0.000213623046875, |
| "rewards/rejected": 0.000244140625, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01282051282051282, |
| "grad_norm": 26.860512892988023, |
| "learning_rate": 8.936170212765956e-08, |
| "logits/chosen": -0.0947265625, |
| "logits/rejected": -0.177734375, |
| "logps/chosen": -0.9140625, |
| "logps/rejected": -0.625, |
| "loss": 0.9963, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00128173828125, |
| "rewards/margins": -0.0008544921875, |
| "rewards/rejected": -0.00042724609375, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014957264957264958, |
| "grad_norm": 32.66934693408433, |
| "learning_rate": 1.0425531914893615e-07, |
| "logits/chosen": -0.61328125, |
| "logits/rejected": -0.4765625, |
| "logps/chosen": -2.046875, |
| "logps/rejected": -2.734375, |
| "loss": 0.9972, |
| "rewards/accuracies": 0.0625, |
| "rewards/chosen": -0.00439453125, |
| "rewards/margins": -0.00103759765625, |
| "rewards/rejected": -0.00335693359375, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.017094017094017096, |
| "grad_norm": 80.33188302131406, |
| "learning_rate": 1.1914893617021276e-07, |
| "logits/chosen": -0.5390625, |
| "logits/rejected": -0.47265625, |
| "logps/chosen": -1.078125, |
| "logps/rejected": -0.6484375, |
| "loss": 1.0045, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.00201416015625, |
| "rewards/margins": 0.00262451171875, |
| "rewards/rejected": -0.0006103515625, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.019230769230769232, |
| "grad_norm": 31.740360264135372, |
| "learning_rate": 1.3404255319148934e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.515625, |
| "logps/chosen": -1.0859375, |
| "logps/rejected": -1.2109375, |
| "loss": 0.9946, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00146484375, |
| "rewards/margins": -0.0001220703125, |
| "rewards/rejected": -0.0013427734375, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.021367521367521368, |
| "grad_norm": 33.44232905394117, |
| "learning_rate": 1.4893617021276595e-07, |
| "logits/chosen": -0.458984375, |
| "logits/rejected": -0.390625, |
| "logps/chosen": -0.59375, |
| "logps/rejected": -0.5703125, |
| "loss": 0.9956, |
| "rewards/accuracies": 0.125, |
| "rewards/chosen": -0.000762939453125, |
| "rewards/margins": -0.00079345703125, |
| "rewards/rejected": 3.0517578125e-05, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.023504273504273504, |
| "grad_norm": 29.571097837737284, |
| "learning_rate": 1.6382978723404256e-07, |
| "logits/chosen": -0.310546875, |
| "logits/rejected": -0.341796875, |
| "logps/chosen": -0.8046875, |
| "logps/rejected": -0.89453125, |
| "loss": 1.0005, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00048828125, |
| "rewards/margins": 0.0009765625, |
| "rewards/rejected": -0.00146484375, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02564102564102564, |
| "grad_norm": 36.03559141159498, |
| "learning_rate": 1.7872340425531912e-07, |
| "logits/chosen": -0.39453125, |
| "logits/rejected": -0.380859375, |
| "logps/chosen": -0.88671875, |
| "logps/rejected": -1.484375, |
| "loss": 0.9994, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.00079345703125, |
| "rewards/margins": 0.00079345703125, |
| "rewards/rejected": 0.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.027777777777777776, |
| "grad_norm": 24.278252772621258, |
| "learning_rate": 1.9361702127659575e-07, |
| "logits/chosen": -0.49609375, |
| "logits/rejected": -0.40234375, |
| "logps/chosen": -0.58203125, |
| "logps/rejected": -0.4296875, |
| "loss": 0.9996, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.00048828125, |
| "rewards/margins": 3.0517578125e-05, |
| "rewards/rejected": -0.000518798828125, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.029914529914529916, |
| "grad_norm": 53.95191319044397, |
| "learning_rate": 2.085106382978723e-07, |
| "logits/chosen": -0.6171875, |
| "logits/rejected": -0.6015625, |
| "logps/chosen": -0.60546875, |
| "logps/rejected": -0.55078125, |
| "loss": 1.0034, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.000732421875, |
| "rewards/margins": -0.00054931640625, |
| "rewards/rejected": -0.00018310546875, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03205128205128205, |
| "grad_norm": 28.363357101937858, |
| "learning_rate": 2.2340425531914894e-07, |
| "logits/chosen": -0.369140625, |
| "logits/rejected": -0.380859375, |
| "logps/chosen": -0.8125, |
| "logps/rejected": -1.6875, |
| "loss": 1.0035, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.002044677734375, |
| "rewards/margins": 0.002166748046875, |
| "rewards/rejected": -0.00421142578125, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03418803418803419, |
| "grad_norm": 60.351935351269326, |
| "learning_rate": 2.3829787234042553e-07, |
| "logits/chosen": -0.48046875, |
| "logits/rejected": -0.4453125, |
| "logps/chosen": -0.578125, |
| "logps/rejected": -0.6640625, |
| "loss": 0.998, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -6.103515625e-05, |
| "rewards/margins": 0.001434326171875, |
| "rewards/rejected": -0.001495361328125, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03632478632478633, |
| "grad_norm": 73.3125350448486, |
| "learning_rate": 2.5319148936170213e-07, |
| "logits/chosen": -0.61328125, |
| "logits/rejected": -0.55078125, |
| "logps/chosen": -0.640625, |
| "logps/rejected": -0.67578125, |
| "loss": 0.9965, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00067138671875, |
| "rewards/margins": 0.0013427734375, |
| "rewards/rejected": -0.00201416015625, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.038461538461538464, |
| "grad_norm": 39.12184643853537, |
| "learning_rate": 2.680851063829787e-07, |
| "logits/chosen": -0.271484375, |
| "logits/rejected": -0.294921875, |
| "logps/chosen": -1.34375, |
| "logps/rejected": -1.296875, |
| "loss": 1.002, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.010009765625, |
| "rewards/margins": 0.0025634765625, |
| "rewards/rejected": 0.0074462890625, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0405982905982906, |
| "grad_norm": 37.45050759373028, |
| "learning_rate": 2.829787234042553e-07, |
| "logits/chosen": -0.6640625, |
| "logits/rejected": -0.5234375, |
| "logps/chosen": -0.82421875, |
| "logps/rejected": -0.69921875, |
| "loss": 0.996, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00079345703125, |
| "rewards/margins": 0.00042724609375, |
| "rewards/rejected": -0.001220703125, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.042735042735042736, |
| "grad_norm": 29.741015994855005, |
| "learning_rate": 2.978723404255319e-07, |
| "logits/chosen": -0.484375, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -1.21875, |
| "logps/rejected": -0.734375, |
| "loss": 1.0015, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.002685546875, |
| "rewards/margins": 0.00238037109375, |
| "rewards/rejected": 0.00030517578125, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04487179487179487, |
| "grad_norm": 35.69376933733865, |
| "learning_rate": 3.1276595744680846e-07, |
| "logits/chosen": -0.451171875, |
| "logits/rejected": -0.431640625, |
| "logps/chosen": -1.3828125, |
| "logps/rejected": -1.328125, |
| "loss": 0.9961, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": 0.00091552734375, |
| "rewards/margins": 0.00225830078125, |
| "rewards/rejected": -0.0013427734375, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04700854700854701, |
| "grad_norm": 42.08445297151301, |
| "learning_rate": 3.276595744680851e-07, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -1.0625, |
| "logps/rejected": -1.5546875, |
| "loss": 0.9996, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00054931640625, |
| "rewards/margins": -0.0015869140625, |
| "rewards/rejected": 0.00103759765625, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.049145299145299144, |
| "grad_norm": 53.496163216600635, |
| "learning_rate": 3.425531914893617e-07, |
| "logits/chosen": -0.408203125, |
| "logits/rejected": -0.26953125, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -0.6953125, |
| "loss": 1.0005, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.00347900390625, |
| "rewards/margins": -0.00250244140625, |
| "rewards/rejected": -0.0009765625, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 63.90357017444423, |
| "learning_rate": 3.5744680851063824e-07, |
| "logits/chosen": -0.376953125, |
| "logits/rejected": -0.380859375, |
| "logps/chosen": -0.59375, |
| "logps/rejected": -0.578125, |
| "loss": 0.9982, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0001220703125, |
| "rewards/margins": 0.0008544921875, |
| "rewards/rejected": -0.000732421875, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.053418803418803416, |
| "grad_norm": 17.141245281554458, |
| "learning_rate": 3.7234042553191484e-07, |
| "logits/chosen": -0.5, |
| "logits/rejected": -0.390625, |
| "logps/chosen": -0.67578125, |
| "logps/rejected": -0.796875, |
| "loss": 0.9971, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.00244140625, |
| "rewards/margins": -0.00164794921875, |
| "rewards/rejected": -0.00079345703125, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05555555555555555, |
| "grad_norm": 41.45668288946838, |
| "learning_rate": 3.872340425531915e-07, |
| "logits/chosen": -0.1650390625, |
| "logits/rejected": -0.1806640625, |
| "logps/chosen": -0.58984375, |
| "logps/rejected": -0.6171875, |
| "loss": 0.9935, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.000335693359375, |
| "rewards/margins": 0.0003509521484375, |
| "rewards/rejected": -1.52587890625e-05, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.057692307692307696, |
| "grad_norm": 36.88109178289957, |
| "learning_rate": 4.021276595744681e-07, |
| "logits/chosen": -0.44921875, |
| "logits/rejected": -0.373046875, |
| "logps/chosen": -1.9140625, |
| "logps/rejected": -2.34375, |
| "loss": 0.9972, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.001953125, |
| "rewards/margins": -0.0030517578125, |
| "rewards/rejected": 0.0010986328125, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05982905982905983, |
| "grad_norm": 22.545905854772307, |
| "learning_rate": 4.170212765957446e-07, |
| "logits/chosen": -0.59375, |
| "logits/rejected": -0.578125, |
| "logps/chosen": -0.85546875, |
| "logps/rejected": -0.8671875, |
| "loss": 0.9972, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.00213623046875, |
| "rewards/margins": -0.00115966796875, |
| "rewards/rejected": -0.0009765625, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06196581196581197, |
| "grad_norm": 43.896588688689796, |
| "learning_rate": 4.319148936170213e-07, |
| "logits/chosen": -0.37890625, |
| "logits/rejected": -0.3515625, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.6171875, |
| "loss": 1.0031, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": 0.0029296875, |
| "rewards/margins": 0.0040283203125, |
| "rewards/rejected": -0.0010986328125, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0641025641025641, |
| "grad_norm": 40.25049255062445, |
| "learning_rate": 4.468085106382979e-07, |
| "logits/chosen": -0.7421875, |
| "logits/rejected": -0.62109375, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -1.8359375, |
| "loss": 0.9939, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00146484375, |
| "rewards/margins": -0.00030517578125, |
| "rewards/rejected": -0.00115966796875, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06623931623931624, |
| "grad_norm": 52.5845011579458, |
| "learning_rate": 4.617021276595744e-07, |
| "logits/chosen": -0.2021484375, |
| "logits/rejected": -0.228515625, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.89453125, |
| "loss": 1.0049, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.003692626953125, |
| "rewards/margins": 0.00396728515625, |
| "rewards/rejected": -0.007659912109375, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06837606837606838, |
| "grad_norm": 49.712522346335234, |
| "learning_rate": 4.7659574468085105e-07, |
| "logits/chosen": -0.34765625, |
| "logits/rejected": -0.40234375, |
| "logps/chosen": -0.53125, |
| "logps/rejected": -0.609375, |
| "loss": 1.0063, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0009765625, |
| "rewards/margins": 0.0025634765625, |
| "rewards/rejected": -0.0035400390625, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07051282051282051, |
| "grad_norm": 38.46690827922013, |
| "learning_rate": 4.914893617021277e-07, |
| "logits/chosen": -0.365234375, |
| "logits/rejected": -0.322265625, |
| "logps/chosen": -0.5234375, |
| "logps/rejected": -0.50390625, |
| "loss": 0.9963, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.000701904296875, |
| "rewards/margins": 0.0001220703125, |
| "rewards/rejected": -0.000823974609375, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07264957264957266, |
| "grad_norm": 92.23984823750516, |
| "learning_rate": 5.063829787234043e-07, |
| "logits/chosen": -0.294921875, |
| "logits/rejected": -0.1875, |
| "logps/chosen": -0.5546875, |
| "logps/rejected": -0.453125, |
| "loss": 1.0079, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0018463134765625, |
| "rewards/margins": 0.0005645751953125, |
| "rewards/rejected": -0.002410888671875, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07478632478632478, |
| "grad_norm": 22.203815257535165, |
| "learning_rate": 5.212765957446808e-07, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.75, |
| "logps/rejected": -0.6953125, |
| "loss": 0.9875, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.000701904296875, |
| "rewards/margins": 0.004669189453125, |
| "rewards/rejected": -0.00537109375, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 64.42185662501916, |
| "learning_rate": 5.361702127659574e-07, |
| "logits/chosen": -0.34375, |
| "logits/rejected": -0.236328125, |
| "logps/chosen": -1.1953125, |
| "logps/rejected": -2.15625, |
| "loss": 0.9974, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.006561279296875, |
| "rewards/margins": 0.0133056640625, |
| "rewards/rejected": -0.0198974609375, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07905982905982906, |
| "grad_norm": 57.75280760864894, |
| "learning_rate": 5.51063829787234e-07, |
| "logits/chosen": -0.34765625, |
| "logits/rejected": -0.298828125, |
| "logps/chosen": -1.1953125, |
| "logps/rejected": -0.75390625, |
| "loss": 0.9971, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.005584716796875, |
| "rewards/margins": -0.002410888671875, |
| "rewards/rejected": -0.003173828125, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0811965811965812, |
| "grad_norm": 50.74349404882503, |
| "learning_rate": 5.659574468085106e-07, |
| "logits/chosen": -0.416015625, |
| "logits/rejected": -0.3203125, |
| "logps/chosen": -1.0, |
| "logps/rejected": -0.765625, |
| "loss": 0.9863, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.000762939453125, |
| "rewards/margins": -0.0001220703125, |
| "rewards/rejected": -0.000640869140625, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 65.63535736055714, |
| "learning_rate": 5.808510638297872e-07, |
| "logits/chosen": -0.5390625, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.7734375, |
| "logps/rejected": -0.65234375, |
| "loss": 1.0226, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.005859375, |
| "rewards/margins": -0.00067138671875, |
| "rewards/rejected": -0.00518798828125, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 117.44282894336358, |
| "learning_rate": 5.957446808510638e-07, |
| "logits/chosen": -0.359375, |
| "logits/rejected": -0.37109375, |
| "logps/chosen": -1.0, |
| "logps/rejected": -0.5859375, |
| "loss": 1.022, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.00732421875, |
| "rewards/margins": -0.00384521484375, |
| "rewards/rejected": -0.00347900390625, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0876068376068376, |
| "grad_norm": 41.851193269641925, |
| "learning_rate": 6.106382978723404e-07, |
| "logits/chosen": -0.4609375, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -0.73828125, |
| "logps/rejected": -0.73828125, |
| "loss": 0.9847, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.006866455078125, |
| "rewards/margins": 0.0005645751953125, |
| "rewards/rejected": -0.0074462890625, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08974358974358974, |
| "grad_norm": 28.730097925755693, |
| "learning_rate": 6.255319148936169e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.6328125, |
| "logps/rejected": -0.5703125, |
| "loss": 0.976, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.00238037109375, |
| "rewards/margins": 0.006927490234375, |
| "rewards/rejected": -0.004547119140625, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09188034188034189, |
| "grad_norm": 66.6883676221711, |
| "learning_rate": 6.404255319148935e-07, |
| "logits/chosen": -0.0274658203125, |
| "logits/rejected": -0.09765625, |
| "logps/chosen": -2.734375, |
| "logps/rejected": -1.9453125, |
| "loss": 0.9933, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.00201416015625, |
| "rewards/margins": -0.0035400390625, |
| "rewards/rejected": 0.005584716796875, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09401709401709402, |
| "grad_norm": 28.116383690151462, |
| "learning_rate": 6.553191489361702e-07, |
| "logits/chosen": -0.75, |
| "logits/rejected": -0.63671875, |
| "logps/chosen": -1.6875, |
| "logps/rejected": -1.4296875, |
| "loss": 0.9578, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.029052734375, |
| "rewards/margins": 0.017333984375, |
| "rewards/rejected": 0.01171875, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09615384615384616, |
| "grad_norm": 24.400107414943964, |
| "learning_rate": 6.702127659574469e-07, |
| "logits/chosen": -0.431640625, |
| "logits/rejected": -0.375, |
| "logps/chosen": -0.71484375, |
| "logps/rejected": -1.28125, |
| "loss": 0.9868, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00921630859375, |
| "rewards/margins": 0.01202392578125, |
| "rewards/rejected": -0.021240234375, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09829059829059829, |
| "grad_norm": 28.812916665898577, |
| "learning_rate": 6.851063829787234e-07, |
| "logits/chosen": -0.431640625, |
| "logits/rejected": -0.42578125, |
| "logps/chosen": -0.8359375, |
| "logps/rejected": -0.81640625, |
| "loss": 0.9963, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.01171875, |
| "rewards/margins": 0.003814697265625, |
| "rewards/rejected": 0.0079345703125, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10042735042735043, |
| "grad_norm": 12.920574377710937, |
| "learning_rate": 7e-07, |
| "logits/chosen": -0.421875, |
| "logits/rejected": -0.294921875, |
| "logps/chosen": -0.71875, |
| "logps/rejected": -0.7421875, |
| "loss": 0.9788, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.003753662109375, |
| "rewards/margins": 0.00128173828125, |
| "rewards/rejected": -0.005035400390625, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 25.071111837784507, |
| "learning_rate": 6.999902552301362e-07, |
| "logits/chosen": -0.359375, |
| "logits/rejected": -0.365234375, |
| "logps/chosen": -1.2109375, |
| "logps/rejected": -1.5625, |
| "loss": 0.9839, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.01171875, |
| "rewards/margins": 0.0101318359375, |
| "rewards/rejected": 0.0015869140625, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1047008547008547, |
| "grad_norm": 16.266651913214233, |
| "learning_rate": 6.999610214631767e-07, |
| "logits/chosen": -0.56640625, |
| "logits/rejected": -0.5546875, |
| "logps/chosen": -0.9140625, |
| "logps/rejected": -0.765625, |
| "loss": 0.9821, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00323486328125, |
| "rewards/margins": 0.00689697265625, |
| "rewards/rejected": -0.0101318359375, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.10683760683760683, |
| "grad_norm": 33.0881655418136, |
| "learning_rate": 6.999123003269862e-07, |
| "logits/chosen": -0.2119140625, |
| "logits/rejected": -0.1875, |
| "logps/chosen": -1.03125, |
| "logps/rejected": -1.1953125, |
| "loss": 0.9573, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.00762939453125, |
| "rewards/margins": 0.009765625, |
| "rewards/rejected": -0.002105712890625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10897435897435898, |
| "grad_norm": 147.71801090104262, |
| "learning_rate": 6.998440945345717e-07, |
| "logits/chosen": -0.48828125, |
| "logits/rejected": -0.5390625, |
| "logps/chosen": -1.125, |
| "logps/rejected": -1.4296875, |
| "loss": 1.0513, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0341796875, |
| "rewards/margins": 0.06103515625, |
| "rewards/rejected": -0.02685546875, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 60.17292781809932, |
| "learning_rate": 6.99756407883932e-07, |
| "logits/chosen": -0.322265625, |
| "logits/rejected": -0.43359375, |
| "logps/chosen": -0.73046875, |
| "logps/rejected": -1.078125, |
| "loss": 1.0376, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.01129150390625, |
| "rewards/margins": 0.015625, |
| "rewards/rejected": -0.00439453125, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11324786324786325, |
| "grad_norm": 36.38997121230995, |
| "learning_rate": 6.996492452578456e-07, |
| "logits/chosen": -0.6171875, |
| "logits/rejected": -0.64453125, |
| "logps/chosen": -0.62109375, |
| "logps/rejected": -0.58203125, |
| "loss": 1.0084, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00213623046875, |
| "rewards/margins": 0.003662109375, |
| "rewards/rejected": -0.00579833984375, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.11538461538461539, |
| "grad_norm": 22.671705161068385, |
| "learning_rate": 6.995226126235988e-07, |
| "logits/chosen": -0.5390625, |
| "logits/rejected": -0.4765625, |
| "logps/chosen": -0.68359375, |
| "logps/rejected": -0.71484375, |
| "loss": 0.9937, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0096435546875, |
| "rewards/margins": 0.00054931640625, |
| "rewards/rejected": 0.00909423828125, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11752136752136752, |
| "grad_norm": 61.48411418434429, |
| "learning_rate": 6.993765170326537e-07, |
| "logits/chosen": -0.380859375, |
| "logits/rejected": -0.3828125, |
| "logps/chosen": -0.8671875, |
| "logps/rejected": -0.6875, |
| "loss": 1.0205, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.007232666015625, |
| "rewards/margins": -0.00274658203125, |
| "rewards/rejected": -0.004486083984375, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11965811965811966, |
| "grad_norm": 154.53160691093754, |
| "learning_rate": 6.992109666202556e-07, |
| "logits/chosen": -0.375, |
| "logits/rejected": -0.322265625, |
| "logps/chosen": -0.9609375, |
| "logps/rejected": -0.890625, |
| "loss": 1.026, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.01220703125, |
| "rewards/margins": 0.00152587890625, |
| "rewards/rejected": -0.0137939453125, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12179487179487179, |
| "grad_norm": 29.076806117840395, |
| "learning_rate": 6.990259706049799e-07, |
| "logits/chosen": -0.30078125, |
| "logits/rejected": -0.2451171875, |
| "logps/chosen": -2.265625, |
| "logps/rejected": -1.6640625, |
| "loss": 0.9692, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.03466796875, |
| "rewards/margins": 0.035400390625, |
| "rewards/rejected": -0.06982421875, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12393162393162394, |
| "grad_norm": 70.26911941495058, |
| "learning_rate": 6.988215392882183e-07, |
| "logits/chosen": -0.328125, |
| "logits/rejected": -0.30859375, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.6640625, |
| "loss": 0.9807, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005706787109375, |
| "rewards/margins": -0.0015869140625, |
| "rewards/rejected": -0.004119873046875, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12606837606837606, |
| "grad_norm": 20.11374097217621, |
| "learning_rate": 6.985976840536061e-07, |
| "logits/chosen": -0.69140625, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -0.490234375, |
| "logps/rejected": -0.44921875, |
| "loss": 0.9724, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007080078125, |
| "rewards/margins": -0.0045166015625, |
| "rewards/rejected": -0.0025634765625, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 111.39066614610299, |
| "learning_rate": 6.983544173663875e-07, |
| "logits/chosen": -0.224609375, |
| "logits/rejected": -0.2216796875, |
| "logps/chosen": -0.90234375, |
| "logps/rejected": -1.1015625, |
| "loss": 1.0992, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0238037109375, |
| "rewards/margins": 0.03466796875, |
| "rewards/rejected": -0.058349609375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13034188034188035, |
| "grad_norm": 25.562904676632012, |
| "learning_rate": 6.980917527727217e-07, |
| "logits/chosen": -0.3984375, |
| "logits/rejected": -0.34765625, |
| "logps/chosen": -1.0390625, |
| "logps/rejected": -1.140625, |
| "loss": 0.984, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.028076171875, |
| "rewards/margins": 0.01080322265625, |
| "rewards/rejected": -0.0390625, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13247863247863248, |
| "grad_norm": 48.932976513817344, |
| "learning_rate": 6.978097048989288e-07, |
| "logits/chosen": -0.7578125, |
| "logits/rejected": -0.6875, |
| "logps/chosen": -1.046875, |
| "logps/rejected": -1.203125, |
| "loss": 1.0165, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00921630859375, |
| "rewards/margins": 0.00946044921875, |
| "rewards/rejected": -0.0186767578125, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1346153846153846, |
| "grad_norm": 20.033557361846213, |
| "learning_rate": 6.975082894506753e-07, |
| "logits/chosen": -0.380859375, |
| "logits/rejected": -0.3359375, |
| "logps/chosen": -0.78125, |
| "logps/rejected": -0.66796875, |
| "loss": 1.0021, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.01055908203125, |
| "rewards/margins": -0.00372314453125, |
| "rewards/rejected": -0.0068359375, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.13675213675213677, |
| "grad_norm": 19.905545916166517, |
| "learning_rate": 6.971875232120994e-07, |
| "logits/chosen": -0.49609375, |
| "logits/rejected": -0.388671875, |
| "logps/chosen": -0.62890625, |
| "logps/rejected": -0.5703125, |
| "loss": 0.9924, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.009033203125, |
| "rewards/margins": -0.0011138916015625, |
| "rewards/rejected": -0.0079345703125, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 26.422482495942226, |
| "learning_rate": 6.968474240448763e-07, |
| "logits/chosen": -0.435546875, |
| "logits/rejected": -0.4296875, |
| "logps/chosen": -0.94140625, |
| "logps/rejected": -1.0703125, |
| "loss": 0.98, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.015625, |
| "rewards/margins": 0.00750732421875, |
| "rewards/rejected": -0.023193359375, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14102564102564102, |
| "grad_norm": 68.78295228867675, |
| "learning_rate": 6.964880108872238e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.515625, |
| "logps/chosen": -0.53515625, |
| "logps/rejected": -0.546875, |
| "loss": 1.0587, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00238037109375, |
| "rewards/margins": 0.004364013671875, |
| "rewards/rejected": -0.006744384765625, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.14316239316239315, |
| "grad_norm": 19.507340401891387, |
| "learning_rate": 6.961093037528475e-07, |
| "logits/chosen": -0.546875, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.734375, |
| "logps/rejected": -1.4140625, |
| "loss": 0.9944, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01116943359375, |
| "rewards/margins": 0.0048828125, |
| "rewards/rejected": -0.01611328125, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1452991452991453, |
| "grad_norm": 31.79134213963872, |
| "learning_rate": 6.957113237298269e-07, |
| "logits/chosen": -0.373046875, |
| "logits/rejected": -0.306640625, |
| "logps/chosen": -2.09375, |
| "logps/rejected": -3.15625, |
| "loss": 0.976, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.056640625, |
| "rewards/margins": 0.0146484375, |
| "rewards/rejected": -0.0712890625, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.14743589743589744, |
| "grad_norm": 49.21677800952418, |
| "learning_rate": 6.952940929794406e-07, |
| "logits/chosen": -0.365234375, |
| "logits/rejected": -0.296875, |
| "logps/chosen": -1.046875, |
| "logps/rejected": -0.99609375, |
| "loss": 1.0469, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.026123046875, |
| "rewards/margins": 0.0133056640625, |
| "rewards/rejected": -0.03955078125, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14957264957264957, |
| "grad_norm": 29.943057230035965, |
| "learning_rate": 6.948576347349319e-07, |
| "logits/chosen": -0.44921875, |
| "logits/rejected": -0.470703125, |
| "logps/chosen": -0.53515625, |
| "logps/rejected": -0.494140625, |
| "loss": 0.965, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.000335693359375, |
| "rewards/margins": 0.006011962890625, |
| "rewards/rejected": -0.00567626953125, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1517094017094017, |
| "grad_norm": 15.86774723874277, |
| "learning_rate": 6.944019733002163e-07, |
| "logits/chosen": -0.625, |
| "logits/rejected": -0.5390625, |
| "logps/chosen": -0.75, |
| "logps/rejected": -0.80078125, |
| "loss": 0.9816, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.013427734375, |
| "rewards/margins": 0.003936767578125, |
| "rewards/rejected": 0.00946044921875, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 27.561572561189063, |
| "learning_rate": 6.939271340485266e-07, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.64453125, |
| "logps/chosen": -0.5625, |
| "logps/rejected": -0.6171875, |
| "loss": 0.9576, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.0047607421875, |
| "rewards/margins": 0.00244140625, |
| "rewards/rejected": 0.0023193359375, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.15598290598290598, |
| "grad_norm": 139.7151826401799, |
| "learning_rate": 6.934331434210018e-07, |
| "logits/chosen": -0.54296875, |
| "logits/rejected": -0.494140625, |
| "logps/chosen": -0.55859375, |
| "logps/rejected": -1.171875, |
| "loss": 1.1119, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.005950927734375, |
| "rewards/margins": -0.048828125, |
| "rewards/rejected": 0.0546875, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1581196581196581, |
| "grad_norm": 26.428371794410545, |
| "learning_rate": 6.929200289252127e-07, |
| "logits/chosen": -0.66796875, |
| "logits/rejected": -0.59375, |
| "logps/chosen": -1.625, |
| "logps/rejected": -1.96875, |
| "loss": 0.9962, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 0.037109375, |
| "rewards/margins": -0.00714111328125, |
| "rewards/rejected": 0.04443359375, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16025641025641027, |
| "grad_norm": 71.85447334397465, |
| "learning_rate": 6.923878191336319e-07, |
| "logits/chosen": -0.4609375, |
| "logits/rejected": -0.427734375, |
| "logps/chosen": -0.80859375, |
| "logps/rejected": -1.2109375, |
| "loss": 1.0996, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.000457763671875, |
| "rewards/margins": -0.05029296875, |
| "rewards/rejected": 0.05078125, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1623931623931624, |
| "grad_norm": 38.858782907246336, |
| "learning_rate": 6.918365436820421e-07, |
| "logits/chosen": -0.29296875, |
| "logits/rejected": -0.2431640625, |
| "logps/chosen": -0.71875, |
| "logps/rejected": -0.87109375, |
| "loss": 1.0227, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0166015625, |
| "rewards/margins": -0.003326416015625, |
| "rewards/rejected": -0.0133056640625, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.16452991452991453, |
| "grad_norm": 10.583889406197391, |
| "learning_rate": 6.912662332678855e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.49609375, |
| "logps/chosen": -0.81640625, |
| "logps/rejected": -0.86328125, |
| "loss": 0.9626, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.01220703125, |
| "rewards/margins": 0.01300048828125, |
| "rewards/rejected": -0.0252685546875, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 44.75517764964221, |
| "learning_rate": 6.906769196485548e-07, |
| "logits/chosen": -0.6171875, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -1.21875, |
| "logps/rejected": -1.703125, |
| "loss": 0.9673, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.07861328125, |
| "rewards/margins": 0.057373046875, |
| "rewards/rejected": 0.021240234375, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.16880341880341881, |
| "grad_norm": 35.27372933220996, |
| "learning_rate": 6.90068635639625e-07, |
| "logits/chosen": -0.484375, |
| "logits/rejected": -0.373046875, |
| "logps/chosen": -1.5625, |
| "logps/rejected": -1.046875, |
| "loss": 0.988, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0152587890625, |
| "rewards/margins": 0.021240234375, |
| "rewards/rejected": -0.006011962890625, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 61.73040358186559, |
| "learning_rate": 6.894414151130255e-07, |
| "logits/chosen": -0.2451171875, |
| "logits/rejected": -0.19140625, |
| "logps/chosen": -0.76953125, |
| "logps/rejected": -1.125, |
| "loss": 0.9987, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00714111328125, |
| "rewards/margins": 0.0213623046875, |
| "rewards/rejected": -0.028564453125, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17307692307692307, |
| "grad_norm": 30.95188040932688, |
| "learning_rate": 6.887952929951547e-07, |
| "logits/chosen": -0.478515625, |
| "logits/rejected": -0.37890625, |
| "logps/chosen": -0.7890625, |
| "logps/rejected": -0.88671875, |
| "loss": 1.0059, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007720947265625, |
| "rewards/margins": 0.00811767578125, |
| "rewards/rejected": -0.015869140625, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1752136752136752, |
| "grad_norm": 57.26839307731332, |
| "learning_rate": 6.881303052649344e-07, |
| "logits/chosen": -0.6171875, |
| "logits/rejected": -0.56640625, |
| "logps/chosen": -0.5234375, |
| "logps/rejected": -0.56640625, |
| "loss": 1.0756, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.00421142578125, |
| "rewards/margins": 0.00244140625, |
| "rewards/rejected": 0.00177001953125, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.17735042735042736, |
| "grad_norm": 44.65238337963925, |
| "learning_rate": 6.87446488951807e-07, |
| "logits/chosen": -0.43359375, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -0.65234375, |
| "logps/rejected": -1.4296875, |
| "loss": 0.9998, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0040283203125, |
| "rewards/margins": 0.0008544921875, |
| "rewards/rejected": -0.0048828125, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1794871794871795, |
| "grad_norm": 31.057443291067262, |
| "learning_rate": 6.867438821336729e-07, |
| "logits/chosen": -0.421875, |
| "logits/rejected": -0.423828125, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -1.59375, |
| "loss": 0.9746, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00225830078125, |
| "rewards/margins": 0.0751953125, |
| "rewards/rejected": -0.0771484375, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.18162393162393162, |
| "grad_norm": 92.40532740478336, |
| "learning_rate": 6.860225239347707e-07, |
| "logits/chosen": -0.353515625, |
| "logits/rejected": -0.26171875, |
| "logps/chosen": -1.140625, |
| "logps/rejected": -0.96875, |
| "loss": 1.014, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.037109375, |
| "rewards/margins": 0.05029296875, |
| "rewards/rejected": -0.01336669921875, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.18376068376068377, |
| "grad_norm": 17.92936798357233, |
| "learning_rate": 6.852824545234985e-07, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.470703125, |
| "logps/chosen": -0.5078125, |
| "logps/rejected": -0.52734375, |
| "loss": 0.9742, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.000732421875, |
| "rewards/margins": 0.004791259765625, |
| "rewards/rejected": -0.004058837890625, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1858974358974359, |
| "grad_norm": 20.80379260553499, |
| "learning_rate": 6.845237151101767e-07, |
| "logits/chosen": -0.228515625, |
| "logits/rejected": 0.023681640625, |
| "logps/chosen": -0.65234375, |
| "logps/rejected": -0.72265625, |
| "loss": 0.9644, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 3.0517578125e-05, |
| "rewards/margins": 0.0167236328125, |
| "rewards/rejected": -0.0167236328125, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.18803418803418803, |
| "grad_norm": 72.09254799159736, |
| "learning_rate": 6.837463479447537e-07, |
| "logits/chosen": -0.31640625, |
| "logits/rejected": -0.26953125, |
| "logps/chosen": -1.1171875, |
| "logps/rejected": -2.8125, |
| "loss": 0.9951, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00970458984375, |
| "rewards/margins": 0.11376953125, |
| "rewards/rejected": -0.12353515625, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19017094017094016, |
| "grad_norm": 20.176800812028358, |
| "learning_rate": 6.829503963144531e-07, |
| "logits/chosen": -0.4140625, |
| "logits/rejected": -0.30859375, |
| "logps/chosen": -0.85546875, |
| "logps/rejected": -0.72265625, |
| "loss": 1.0018, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0087890625, |
| "rewards/margins": -0.00335693359375, |
| "rewards/rejected": -0.00543212890625, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.19230769230769232, |
| "grad_norm": 57.87970181056306, |
| "learning_rate": 6.821359045413631e-07, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.44921875, |
| "logps/chosen": -0.86328125, |
| "logps/rejected": -0.86328125, |
| "loss": 1.0153, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 0.00054931640625, |
| "rewards/margins": 0.0010986328125, |
| "rewards/rejected": -0.00054931640625, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19444444444444445, |
| "grad_norm": 49.16480618396826, |
| "learning_rate": 6.813029179799691e-07, |
| "logits/chosen": -0.44921875, |
| "logits/rejected": -0.451171875, |
| "logps/chosen": -0.62890625, |
| "logps/rejected": -1.3828125, |
| "loss": 1.0355, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.001312255859375, |
| "rewards/margins": 0.02392578125, |
| "rewards/rejected": -0.022705078125, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.19658119658119658, |
| "grad_norm": 33.2077502398859, |
| "learning_rate": 6.804514830146271e-07, |
| "logits/chosen": -0.61328125, |
| "logits/rejected": -0.56640625, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.71875, |
| "loss": 0.9918, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.00201416015625, |
| "rewards/margins": 0.01336669921875, |
| "rewards/rejected": -0.0113525390625, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1987179487179487, |
| "grad_norm": 33.15696851492135, |
| "learning_rate": 6.795816470569815e-07, |
| "logits/chosen": -0.46484375, |
| "logits/rejected": -0.40234375, |
| "logps/chosen": -0.6875, |
| "logps/rejected": -1.25, |
| "loss": 0.988, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.0029296875, |
| "rewards/margins": 0.0888671875, |
| "rewards/rejected": -0.0859375, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.20085470085470086, |
| "grad_norm": 33.760745328662495, |
| "learning_rate": 6.786934585433253e-07, |
| "logits/chosen": -0.44140625, |
| "logits/rejected": -0.43359375, |
| "logps/chosen": -0.6484375, |
| "logps/rejected": -0.6328125, |
| "loss": 0.9528, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004486083984375, |
| "rewards/margins": 0.0040283203125, |
| "rewards/rejected": -0.008544921875, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.202991452991453, |
| "grad_norm": 16.210619092434975, |
| "learning_rate": 6.777869669319021e-07, |
| "logits/chosen": -0.34765625, |
| "logits/rejected": -0.341796875, |
| "logps/chosen": -0.5078125, |
| "logps/rejected": -0.57421875, |
| "loss": 0.9913, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.007598876953125, |
| "rewards/margins": 0.004486083984375, |
| "rewards/rejected": 0.00311279296875, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 28.57830685419194, |
| "learning_rate": 6.768622227001528e-07, |
| "logits/chosen": -0.69140625, |
| "logits/rejected": -0.453125, |
| "logps/chosen": -0.7734375, |
| "logps/rejected": -0.671875, |
| "loss": 0.9589, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00457763671875, |
| "rewards/margins": 0.01177978515625, |
| "rewards/rejected": -0.016357421875, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.20726495726495728, |
| "grad_norm": 54.39294051177383, |
| "learning_rate": 6.759192773419042e-07, |
| "logits/chosen": -0.58984375, |
| "logits/rejected": -0.54296875, |
| "logps/chosen": -1.046875, |
| "logps/rejected": -1.265625, |
| "loss": 1.0544, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00372314453125, |
| "rewards/margins": -0.00537109375, |
| "rewards/rejected": 0.00164794921875, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2094017094017094, |
| "grad_norm": 59.836687557700564, |
| "learning_rate": 6.749581833645022e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.5390625, |
| "logps/chosen": -1.6171875, |
| "logps/rejected": -0.65625, |
| "loss": 1.0487, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.019287109375, |
| "rewards/margins": -0.0028076171875, |
| "rewards/rejected": -0.016357421875, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.21153846153846154, |
| "grad_norm": 37.8780536986917, |
| "learning_rate": 6.739789942858876e-07, |
| "logits/chosen": -0.365234375, |
| "logits/rejected": -0.375, |
| "logps/chosen": -0.89453125, |
| "logps/rejected": -0.76171875, |
| "loss": 0.9814, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0108642578125, |
| "rewards/margins": 0.002716064453125, |
| "rewards/rejected": 0.00811767578125, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.21367521367521367, |
| "grad_norm": 29.168248507387297, |
| "learning_rate": 6.729817646316158e-07, |
| "logits/chosen": -0.380859375, |
| "logits/rejected": -0.408203125, |
| "logps/chosen": -2.015625, |
| "logps/rejected": -2.125, |
| "loss": 0.9626, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0263671875, |
| "rewards/margins": 0.016845703125, |
| "rewards/rejected": 0.0096435546875, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21367521367521367, |
| "eval_logits/chosen": -0.50390625, |
| "eval_logits/rejected": -0.48828125, |
| "eval_logps/chosen": -1.0078125, |
| "eval_logps/rejected": -1.0390625, |
| "eval_loss": 1.00387442111969, |
| "eval_rewards/accuracies": 0.6290322542190552, |
| "eval_rewards/chosen": 0.0093994140625, |
| "eval_rewards/margins": 0.031982421875, |
| "eval_rewards/rejected": -0.0225830078125, |
| "eval_runtime": 105.2748, |
| "eval_samples_per_second": 18.627, |
| "eval_steps_per_second": 0.589, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21581196581196582, |
| "grad_norm": 40.180093476954596, |
| "learning_rate": 6.719665499318211e-07, |
| "logits/chosen": -0.62890625, |
| "logits/rejected": -0.53515625, |
| "logps/chosen": -0.57421875, |
| "logps/rejected": -0.98046875, |
| "loss": 0.9673, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.0015869140625, |
| "rewards/margins": 0.0302734375, |
| "rewards/rejected": -0.02880859375, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.21794871794871795, |
| "grad_norm": 20.943412477220253, |
| "learning_rate": 6.709334067181241e-07, |
| "logits/chosen": -0.462890625, |
| "logits/rejected": -0.39453125, |
| "logps/chosen": -1.1875, |
| "logps/rejected": -1.359375, |
| "loss": 0.9623, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.003814697265625, |
| "rewards/margins": 0.03271484375, |
| "rewards/rejected": -0.029052734375, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.22008547008547008, |
| "grad_norm": 31.399197614010006, |
| "learning_rate": 6.69882392520484e-07, |
| "logits/chosen": -0.365234375, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -1.4140625, |
| "logps/rejected": -0.71484375, |
| "loss": 1.0045, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0361328125, |
| "rewards/margins": -0.029541015625, |
| "rewards/rejected": -0.00665283203125, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 26.799960542003504, |
| "learning_rate": 6.688135658639948e-07, |
| "logits/chosen": -0.400390625, |
| "logits/rejected": -0.40625, |
| "logps/chosen": -0.69140625, |
| "logps/rejected": -0.640625, |
| "loss": 0.9724, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00494384765625, |
| "rewards/margins": -0.00189208984375, |
| "rewards/rejected": -0.0030517578125, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.22435897435897437, |
| "grad_norm": 25.35218941711545, |
| "learning_rate": 6.677269862656269e-07, |
| "logits/chosen": -0.39453125, |
| "logits/rejected": -0.341796875, |
| "logps/chosen": -0.875, |
| "logps/rejected": -1.0390625, |
| "loss": 0.9951, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.019287109375, |
| "rewards/margins": -0.0010833740234375, |
| "rewards/rejected": -0.0181884765625, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2264957264957265, |
| "grad_norm": 26.17479347283922, |
| "learning_rate": 6.666227142309125e-07, |
| "logits/chosen": -0.5234375, |
| "logits/rejected": -0.46484375, |
| "logps/chosen": -0.640625, |
| "logps/rejected": -1.59375, |
| "loss": 0.9584, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.008056640625, |
| "rewards/margins": 0.005126953125, |
| "rewards/rejected": 0.0029296875, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.22863247863247863, |
| "grad_norm": 28.773921806862447, |
| "learning_rate": 6.655008112505764e-07, |
| "logits/chosen": -0.37109375, |
| "logits/rejected": -0.341796875, |
| "logps/chosen": -0.5, |
| "logps/rejected": -0.97265625, |
| "loss": 0.9639, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.006195068359375, |
| "rewards/margins": 0.055419921875, |
| "rewards/rejected": -0.061279296875, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 11.104751430129355, |
| "learning_rate": 6.643613397971118e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.5234375, |
| "logps/chosen": -0.6796875, |
| "logps/rejected": -0.72265625, |
| "loss": 0.9629, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.01544189453125, |
| "rewards/margins": 0.00067138671875, |
| "rewards/rejected": 0.0147705078125, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2329059829059829, |
| "grad_norm": 39.285256444884, |
| "learning_rate": 6.632043633213024e-07, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.55078125, |
| "logps/chosen": -0.68359375, |
| "logps/rejected": -0.70703125, |
| "loss": 1.0209, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0003662109375, |
| "rewards/margins": 0.01263427734375, |
| "rewards/rejected": -0.01300048828125, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.23504273504273504, |
| "grad_norm": 14.705470908694043, |
| "learning_rate": 6.620299462486878e-07, |
| "logits/chosen": -0.45703125, |
| "logits/rejected": -0.328125, |
| "logps/chosen": -0.703125, |
| "logps/rejected": -0.640625, |
| "loss": 0.9722, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0147705078125, |
| "rewards/margins": 0.0093994140625, |
| "rewards/rejected": 0.00537109375, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23717948717948717, |
| "grad_norm": 45.43856755416852, |
| "learning_rate": 6.608381539759773e-07, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -1.4609375, |
| "logps/rejected": -2.671875, |
| "loss": 0.9984, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0208740234375, |
| "rewards/margins": 0.0400390625, |
| "rewards/rejected": -0.060546875, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.23931623931623933, |
| "grad_norm": 40.396012997379934, |
| "learning_rate": 6.596290528674075e-07, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.4921875, |
| "logps/chosen": -1.75, |
| "logps/rejected": -0.828125, |
| "loss": 1.0299, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.034423828125, |
| "rewards/margins": -0.025146484375, |
| "rewards/rejected": -0.00933837890625, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.24145299145299146, |
| "grad_norm": 47.741295163046054, |
| "learning_rate": 6.584027102510475e-07, |
| "logits/chosen": -0.28515625, |
| "logits/rejected": -0.275390625, |
| "logps/chosen": -0.8671875, |
| "logps/rejected": -0.79296875, |
| "loss": 1.0162, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": 0.00927734375, |
| "rewards/margins": -0.00146484375, |
| "rewards/rejected": 0.0107421875, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.24358974358974358, |
| "grad_norm": 18.775881941823393, |
| "learning_rate": 6.57159194415049e-07, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": -0.42578125, |
| "logps/chosen": -0.5078125, |
| "logps/rejected": -0.5078125, |
| "loss": 0.9446, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.00067138671875, |
| "rewards/margins": 0.007568359375, |
| "rewards/rejected": -0.00689697265625, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.24572649572649571, |
| "grad_norm": 74.2807011818759, |
| "learning_rate": 6.558985746038441e-07, |
| "logits/chosen": -0.3984375, |
| "logits/rejected": -0.42578125, |
| "logps/chosen": -1.375, |
| "logps/rejected": -2.8125, |
| "loss": 1.047, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.002960205078125, |
| "rewards/margins": -0.0458984375, |
| "rewards/rejected": 0.048828125, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.24786324786324787, |
| "grad_norm": 48.801715754922114, |
| "learning_rate": 6.546209210142898e-07, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.359375, |
| "logps/chosen": -0.80859375, |
| "logps/rejected": -1.515625, |
| "loss": 1.0457, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.03515625, |
| "rewards/margins": -0.0283203125, |
| "rewards/rejected": -0.00689697265625, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 80.28448840086726, |
| "learning_rate": 6.533263047917585e-07, |
| "logits/chosen": -0.53515625, |
| "logits/rejected": -0.54296875, |
| "logps/chosen": -1.1484375, |
| "logps/rejected": -1.46875, |
| "loss": 1.0875, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.00018310546875, |
| "rewards/margins": -0.019775390625, |
| "rewards/rejected": 0.02001953125, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.25213675213675213, |
| "grad_norm": 108.99518172041073, |
| "learning_rate": 6.520147980261769e-07, |
| "logits/chosen": -0.30859375, |
| "logits/rejected": -0.2216796875, |
| "logps/chosen": -1.75, |
| "logps/rejected": -0.875, |
| "loss": 1.1354, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.08447265625, |
| "rewards/margins": -0.09326171875, |
| "rewards/rejected": 0.0093994140625, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.25427350427350426, |
| "grad_norm": 18.814589732473536, |
| "learning_rate": 6.506864737480113e-07, |
| "logits/chosen": -0.478515625, |
| "logits/rejected": -0.40625, |
| "logps/chosen": -0.66796875, |
| "logps/rejected": -0.6015625, |
| "loss": 0.948, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.00238037109375, |
| "rewards/margins": 0.01226806640625, |
| "rewards/rejected": -0.0098876953125, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 27.390756219900865, |
| "learning_rate": 6.493414059242011e-07, |
| "logits/chosen": -0.6328125, |
| "logits/rejected": -0.625, |
| "logps/chosen": -0.546875, |
| "logps/rejected": -0.82421875, |
| "loss": 0.9696, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.00482177734375, |
| "rewards/margins": 0.0208740234375, |
| "rewards/rejected": -0.025634765625, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.25854700854700857, |
| "grad_norm": 23.34685170798232, |
| "learning_rate": 6.479796694540399e-07, |
| "logits/chosen": -0.6015625, |
| "logits/rejected": -0.53125, |
| "logps/chosen": -0.859375, |
| "logps/rejected": -0.90625, |
| "loss": 0.9795, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01123046875, |
| "rewards/margins": 0.00103759765625, |
| "rewards/rejected": -0.0123291015625, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2606837606837607, |
| "grad_norm": 40.69213923113079, |
| "learning_rate": 6.46601340165005e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -0.76171875, |
| "logps/rejected": -0.87890625, |
| "loss": 0.9565, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.002777099609375, |
| "rewards/margins": 0.002105712890625, |
| "rewards/rejected": -0.0048828125, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.26282051282051283, |
| "grad_norm": 74.88918479593436, |
| "learning_rate": 6.452064948085348e-07, |
| "logits/chosen": -0.45703125, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.8046875, |
| "logps/rejected": -1.765625, |
| "loss": 1.0458, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00103759765625, |
| "rewards/margins": -0.040771484375, |
| "rewards/rejected": 0.03955078125, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.26495726495726496, |
| "grad_norm": 22.294441111678267, |
| "learning_rate": 6.43795211055755e-07, |
| "logits/chosen": -0.498046875, |
| "logits/rejected": -0.392578125, |
| "logps/chosen": -0.6171875, |
| "logps/rejected": -0.73828125, |
| "loss": 1.0101, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00872802734375, |
| "rewards/margins": 0.0096435546875, |
| "rewards/rejected": -0.018310546875, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2670940170940171, |
| "grad_norm": 29.375252043352734, |
| "learning_rate": 6.423675674931533e-07, |
| "logits/chosen": -0.703125, |
| "logits/rejected": -0.7109375, |
| "logps/chosen": -0.5625, |
| "logps/rejected": -0.90234375, |
| "loss": 1.0037, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0098876953125, |
| "rewards/margins": -0.020751953125, |
| "rewards/rejected": 0.01080322265625, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2692307692307692, |
| "grad_norm": 37.173942240659194, |
| "learning_rate": 6.409236436182041e-07, |
| "logits/chosen": -0.423828125, |
| "logits/rejected": -0.384765625, |
| "logps/chosen": -0.7109375, |
| "logps/rejected": -0.77734375, |
| "loss": 0.9626, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01416015625, |
| "rewards/margins": 0.033447265625, |
| "rewards/rejected": -0.047607421875, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.27136752136752135, |
| "grad_norm": 29.85823779609622, |
| "learning_rate": 6.394635198349408e-07, |
| "logits/chosen": -0.51171875, |
| "logits/rejected": -0.458984375, |
| "logps/chosen": -0.953125, |
| "logps/rejected": -2.265625, |
| "loss": 0.9742, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0296630859375, |
| "rewards/margins": 0.0361328125, |
| "rewards/rejected": -0.06591796875, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.27350427350427353, |
| "grad_norm": 65.30857432466394, |
| "learning_rate": 6.37987277449479e-07, |
| "logits/chosen": -0.3515625, |
| "logits/rejected": -0.369140625, |
| "logps/chosen": -0.82421875, |
| "logps/rejected": -1.4296875, |
| "loss": 0.9835, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.025634765625, |
| "rewards/margins": 0.0205078125, |
| "rewards/rejected": -0.0458984375, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.27564102564102566, |
| "grad_norm": 149.41739915395954, |
| "learning_rate": 6.364949986654889e-07, |
| "logits/chosen": -0.59375, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -0.8515625, |
| "logps/rejected": -1.1484375, |
| "loss": 1.0493, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00823974609375, |
| "rewards/margins": 0.0205078125, |
| "rewards/rejected": -0.028564453125, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 37.22138619167702, |
| "learning_rate": 6.349867665796183e-07, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.462890625, |
| "logps/chosen": -1.0, |
| "logps/rejected": -1.953125, |
| "loss": 0.9562, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.01171875, |
| "rewards/margins": 0.04345703125, |
| "rewards/rejected": -0.055419921875, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2799145299145299, |
| "grad_norm": 28.846364966596266, |
| "learning_rate": 6.334626651768649e-07, |
| "logits/chosen": -0.287109375, |
| "logits/rejected": -0.302734375, |
| "logps/chosen": -0.6015625, |
| "logps/rejected": -0.60546875, |
| "loss": 0.9603, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.004425048828125, |
| "rewards/margins": 0.019775390625, |
| "rewards/rejected": -0.024169921875, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.28205128205128205, |
| "grad_norm": 33.885232819627916, |
| "learning_rate": 6.319227793258992e-07, |
| "logits/chosen": -0.458984375, |
| "logits/rejected": -0.4375, |
| "logps/chosen": -0.53125, |
| "logps/rejected": -0.5234375, |
| "loss": 0.968, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.003753662109375, |
| "rewards/margins": 0.0150146484375, |
| "rewards/rejected": -0.018798828125, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2841880341880342, |
| "grad_norm": 25.422079466708336, |
| "learning_rate": 6.3036719477434e-07, |
| "logits/chosen": -0.30078125, |
| "logits/rejected": -0.2421875, |
| "logps/chosen": -0.890625, |
| "logps/rejected": -0.7109375, |
| "loss": 0.9771, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.00311279296875, |
| "rewards/margins": 0.008544921875, |
| "rewards/rejected": -0.0115966796875, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2863247863247863, |
| "grad_norm": 18.812454516360713, |
| "learning_rate": 6.287959981439785e-07, |
| "logits/chosen": -0.462890625, |
| "logits/rejected": -0.443359375, |
| "logps/chosen": -0.90625, |
| "logps/rejected": -1.1171875, |
| "loss": 0.9211, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0072021484375, |
| "rewards/margins": 0.05712890625, |
| "rewards/rejected": -0.064453125, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.28846153846153844, |
| "grad_norm": 14.372906903551076, |
| "learning_rate": 6.272092769259549e-07, |
| "logits/chosen": -0.65625, |
| "logits/rejected": -0.5546875, |
| "logps/chosen": -0.65625, |
| "logps/rejected": -0.65625, |
| "loss": 0.9391, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0064697265625, |
| "rewards/margins": 0.006744384765625, |
| "rewards/rejected": -0.01318359375, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2905982905982906, |
| "grad_norm": 18.32033757466638, |
| "learning_rate": 6.256071194758872e-07, |
| "logits/chosen": -0.62109375, |
| "logits/rejected": -0.625, |
| "logps/chosen": -1.65625, |
| "logps/rejected": -2.734375, |
| "loss": 0.9296, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0137939453125, |
| "rewards/margins": 0.03173828125, |
| "rewards/rejected": -0.01806640625, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.29273504273504275, |
| "grad_norm": 31.82635270525967, |
| "learning_rate": 6.239896150089505e-07, |
| "logits/chosen": -0.6328125, |
| "logits/rejected": -0.5703125, |
| "logps/chosen": -0.71875, |
| "logps/rejected": -1.7890625, |
| "loss": 0.989, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.017333984375, |
| "rewards/margins": 0.04443359375, |
| "rewards/rejected": -0.0615234375, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2948717948717949, |
| "grad_norm": 21.238442566798064, |
| "learning_rate": 6.223568535949091e-07, |
| "logits/chosen": -0.4296875, |
| "logits/rejected": -0.4140625, |
| "logps/chosen": -0.875, |
| "logps/rejected": -0.99609375, |
| "loss": 0.9547, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0115966796875, |
| "rewards/margins": 0.038330078125, |
| "rewards/rejected": -0.0267333984375, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.297008547008547, |
| "grad_norm": 51.91085456898058, |
| "learning_rate": 6.207089261531013e-07, |
| "logits/chosen": -0.55859375, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -1.9609375, |
| "logps/rejected": -1.546875, |
| "loss": 1.0664, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.034912109375, |
| "rewards/margins": 0.037109375, |
| "rewards/rejected": -0.00213623046875, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.29914529914529914, |
| "grad_norm": 41.597766022453776, |
| "learning_rate": 6.19045924447377e-07, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.4609375, |
| "logps/chosen": -1.2421875, |
| "logps/rejected": -1.921875, |
| "loss": 0.968, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.037353515625, |
| "rewards/margins": -0.00286865234375, |
| "rewards/rejected": -0.03466796875, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.30128205128205127, |
| "grad_norm": 46.81449342159252, |
| "learning_rate": 6.173679410809868e-07, |
| "logits/chosen": -0.6171875, |
| "logits/rejected": -0.484375, |
| "logps/chosen": -1.0546875, |
| "logps/rejected": -0.58984375, |
| "loss": 1.0081, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.0732421875, |
| "rewards/margins": 0.08203125, |
| "rewards/rejected": -0.00982666015625, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3034188034188034, |
| "grad_norm": 67.6599719874742, |
| "learning_rate": 6.156750694914267e-07, |
| "logits/chosen": -0.220703125, |
| "logits/rejected": -0.27734375, |
| "logps/chosen": -0.70703125, |
| "logps/rejected": -0.7734375, |
| "loss": 1.103, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.022705078125, |
| "rewards/margins": -0.00018310546875, |
| "rewards/rejected": -0.0224609375, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3055555555555556, |
| "grad_norm": 63.19976246483412, |
| "learning_rate": 6.139674039452337e-07, |
| "logits/chosen": -0.341796875, |
| "logits/rejected": -0.330078125, |
| "logps/chosen": -0.765625, |
| "logps/rejected": -0.88671875, |
| "loss": 1.0105, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00946044921875, |
| "rewards/margins": 0.0125732421875, |
| "rewards/rejected": -0.02197265625, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 45.56384975545583, |
| "learning_rate": 6.12245039532738e-07, |
| "logits/chosen": -0.439453125, |
| "logits/rejected": -0.455078125, |
| "logps/chosen": -0.515625, |
| "logps/rejected": -0.61328125, |
| "loss": 1.0502, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0072021484375, |
| "rewards/margins": 0.008056640625, |
| "rewards/rejected": -0.0008544921875, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.30982905982905984, |
| "grad_norm": 10.977460072453223, |
| "learning_rate": 6.105080721627672e-07, |
| "logits/chosen": -0.66796875, |
| "logits/rejected": -0.6875, |
| "logps/chosen": -0.6953125, |
| "logps/rejected": -0.71875, |
| "loss": 0.9111, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01806640625, |
| "rewards/margins": 0.046630859375, |
| "rewards/rejected": -0.064453125, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.31196581196581197, |
| "grad_norm": 36.093300799361764, |
| "learning_rate": 6.087565985573058e-07, |
| "logits/chosen": -0.427734375, |
| "logits/rejected": -0.375, |
| "logps/chosen": -1.28125, |
| "logps/rejected": -2.5625, |
| "loss": 0.9296, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.009765625, |
| "rewards/margins": 0.10986328125, |
| "rewards/rejected": -0.10009765625, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3141025641025641, |
| "grad_norm": 24.555454331128033, |
| "learning_rate": 6.069907162461091e-07, |
| "logits/chosen": -0.384765625, |
| "logits/rejected": -0.3203125, |
| "logps/chosen": -0.88671875, |
| "logps/rejected": -1.703125, |
| "loss": 0.925, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.011474609375, |
| "rewards/margins": 0.01226806640625, |
| "rewards/rejected": -0.02392578125, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.3162393162393162, |
| "grad_norm": 17.962639768725776, |
| "learning_rate": 6.052105235612728e-07, |
| "logits/chosen": -0.5546875, |
| "logits/rejected": -0.482421875, |
| "logps/chosen": -0.6640625, |
| "logps/rejected": -0.76171875, |
| "loss": 0.9261, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01116943359375, |
| "rewards/margins": 0.034912109375, |
| "rewards/rejected": -0.04638671875, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.31837606837606836, |
| "grad_norm": 88.52202221948862, |
| "learning_rate": 6.03416119631757e-07, |
| "logits/chosen": -0.62890625, |
| "logits/rejected": -0.55078125, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.7578125, |
| "loss": 1.1044, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0087890625, |
| "rewards/margins": 0.04638671875, |
| "rewards/rejected": -0.05517578125, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.32051282051282054, |
| "grad_norm": 72.8710811659705, |
| "learning_rate": 6.016076043778666e-07, |
| "logits/chosen": -0.56640625, |
| "logits/rejected": -0.515625, |
| "logps/chosen": -1.03125, |
| "logps/rejected": -0.8046875, |
| "loss": 1.0563, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.000885009765625, |
| "rewards/margins": 0.030029296875, |
| "rewards/rejected": -0.029052734375, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.32264957264957267, |
| "grad_norm": 41.317922231176695, |
| "learning_rate": 5.99785078505687e-07, |
| "logits/chosen": -0.4609375, |
| "logits/rejected": -0.484375, |
| "logps/chosen": -0.84375, |
| "logps/rejected": -0.89453125, |
| "loss": 0.9644, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01446533203125, |
| "rewards/margins": 0.031494140625, |
| "rewards/rejected": -0.0458984375, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3247863247863248, |
| "grad_norm": 91.94873476738564, |
| "learning_rate": 5.979486435014762e-07, |
| "logits/chosen": -0.74609375, |
| "logits/rejected": -0.66015625, |
| "logps/chosen": -0.6484375, |
| "logps/rejected": -0.76953125, |
| "loss": 1.0979, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.03857421875, |
| "rewards/margins": 0.024658203125, |
| "rewards/rejected": -0.0634765625, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3269230769230769, |
| "grad_norm": 25.837369633853438, |
| "learning_rate": 5.960984016260143e-07, |
| "logits/chosen": -0.5390625, |
| "logits/rejected": -0.416015625, |
| "logps/chosen": -0.890625, |
| "logps/rejected": -1.453125, |
| "loss": 0.9852, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.019775390625, |
| "rewards/margins": 0.01904296875, |
| "rewards/rejected": -0.038818359375, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.32905982905982906, |
| "grad_norm": 39.18517654429065, |
| "learning_rate": 5.942344559089085e-07, |
| "logits/chosen": -0.34765625, |
| "logits/rejected": -0.34375, |
| "logps/chosen": -1.3671875, |
| "logps/rejected": -1.0, |
| "loss": 1.0094, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.040771484375, |
| "rewards/margins": -0.005279541015625, |
| "rewards/rejected": -0.03564453125, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3311965811965812, |
| "grad_norm": 35.50412006815021, |
| "learning_rate": 5.923569101428565e-07, |
| "logits/chosen": -0.341796875, |
| "logits/rejected": -0.275390625, |
| "logps/chosen": -1.0703125, |
| "logps/rejected": -1.09375, |
| "loss": 0.9484, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0291748046875, |
| "rewards/margins": 0.040283203125, |
| "rewards/rejected": -0.0693359375, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 22.773657057689636, |
| "learning_rate": 5.904658688778659e-07, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": -0.5546875, |
| "logps/chosen": -0.6640625, |
| "logps/rejected": -0.765625, |
| "loss": 0.9808, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0208740234375, |
| "rewards/margins": 0.0145263671875, |
| "rewards/rejected": -0.035400390625, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.33547008547008544, |
| "grad_norm": 52.9050755178356, |
| "learning_rate": 5.885614374154336e-07, |
| "logits/chosen": -0.396484375, |
| "logits/rejected": -0.30078125, |
| "logps/chosen": -0.4453125, |
| "logps/rejected": -0.45703125, |
| "loss": 0.9656, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.00335693359375, |
| "rewards/margins": 0.01165771484375, |
| "rewards/rejected": -0.0150146484375, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.33760683760683763, |
| "grad_norm": 86.68634256496178, |
| "learning_rate": 5.866437218026815e-07, |
| "logits/chosen": -0.54296875, |
| "logits/rejected": -0.4921875, |
| "logps/chosen": -0.5625, |
| "logps/rejected": -0.69921875, |
| "loss": 1.061, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.004150390625, |
| "rewards/margins": 0.0101318359375, |
| "rewards/rejected": -0.0142822265625, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.33974358974358976, |
| "grad_norm": 40.533651979323515, |
| "learning_rate": 5.847128288264513e-07, |
| "logits/chosen": -0.435546875, |
| "logits/rejected": -0.396484375, |
| "logps/chosen": -0.828125, |
| "logps/rejected": -0.9921875, |
| "loss": 0.9945, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.01055908203125, |
| "rewards/margins": 0.01904296875, |
| "rewards/rejected": -0.0296630859375, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 45.12294203204509, |
| "learning_rate": 5.827688660073584e-07, |
| "logits/chosen": -0.46875, |
| "logits/rejected": -0.46875, |
| "logps/chosen": -0.65234375, |
| "logps/rejected": -0.609375, |
| "loss": 1.024, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.002410888671875, |
| "rewards/margins": 0.005523681640625, |
| "rewards/rejected": -0.0079345703125, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.344017094017094, |
| "grad_norm": 26.553957066037817, |
| "learning_rate": 5.808119415938044e-07, |
| "logits/chosen": -0.64453125, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -0.87890625, |
| "logps/rejected": -1.1484375, |
| "loss": 0.962, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.02978515625, |
| "rewards/margins": 0.0712890625, |
| "rewards/rejected": -0.10107421875, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.34615384615384615, |
| "grad_norm": 28.26727251383747, |
| "learning_rate": 5.788421645559498e-07, |
| "logits/chosen": -0.1748046875, |
| "logits/rejected": -0.2314453125, |
| "logps/chosen": -0.69140625, |
| "logps/rejected": -0.7109375, |
| "loss": 0.9536, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.011474609375, |
| "rewards/margins": 0.0181884765625, |
| "rewards/rejected": -0.0296630859375, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3482905982905983, |
| "grad_norm": 34.150127616766795, |
| "learning_rate": 5.768596445796454e-07, |
| "logits/chosen": -0.28125, |
| "logits/rejected": -0.244140625, |
| "logps/chosen": -1.9609375, |
| "logps/rejected": -0.73046875, |
| "loss": 0.9164, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 0.0654296875, |
| "rewards/margins": 0.1259765625, |
| "rewards/rejected": -0.060302734375, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3504273504273504, |
| "grad_norm": 55.99080770473473, |
| "learning_rate": 5.748644920603248e-07, |
| "logits/chosen": -0.53515625, |
| "logits/rejected": -0.515625, |
| "logps/chosen": -0.93359375, |
| "logps/rejected": -1.5625, |
| "loss": 0.9708, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0037841796875, |
| "rewards/margins": 0.06640625, |
| "rewards/rejected": -0.06298828125, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3525641025641026, |
| "grad_norm": 88.95253575474342, |
| "learning_rate": 5.728568180968577e-07, |
| "logits/chosen": -0.734375, |
| "logits/rejected": -0.6953125, |
| "logps/chosen": -1.0078125, |
| "logps/rejected": -1.2421875, |
| "loss": 1.0817, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0113525390625, |
| "rewards/margins": 0.026611328125, |
| "rewards/rejected": -0.0380859375, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3547008547008547, |
| "grad_norm": 42.9833424237718, |
| "learning_rate": 5.708367344853625e-07, |
| "logits/chosen": -0.27734375, |
| "logits/rejected": -0.21875, |
| "logps/chosen": -1.671875, |
| "logps/rejected": -1.640625, |
| "loss": 0.9656, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01336669921875, |
| "rewards/margins": -0.01092529296875, |
| "rewards/rejected": -0.00244140625, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.35683760683760685, |
| "grad_norm": 28.891155958443296, |
| "learning_rate": 5.688043537129817e-07, |
| "logits/chosen": -0.3828125, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -1.65625, |
| "logps/rejected": -1.6796875, |
| "loss": 0.9359, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0634765625, |
| "rewards/margins": 0.01953125, |
| "rewards/rejected": 0.04345703125, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 41.10465613204891, |
| "learning_rate": 5.667597889516172e-07, |
| "logits/chosen": -0.369140625, |
| "logits/rejected": -0.20703125, |
| "logps/chosen": -1.6640625, |
| "logps/rejected": -1.1015625, |
| "loss": 1.0015, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0108642578125, |
| "rewards/margins": 0.06005859375, |
| "rewards/rejected": -0.07080078125, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3611111111111111, |
| "grad_norm": 83.41369039535908, |
| "learning_rate": 5.647031540516297e-07, |
| "logits/chosen": -0.404296875, |
| "logits/rejected": -0.45703125, |
| "logps/chosen": -0.80859375, |
| "logps/rejected": -0.69921875, |
| "loss": 1.1058, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0162353515625, |
| "rewards/margins": 0.0155029296875, |
| "rewards/rejected": -0.03173828125, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.36324786324786323, |
| "grad_norm": 26.2199862579392, |
| "learning_rate": 5.626345635354979e-07, |
| "logits/chosen": -0.6015625, |
| "logits/rejected": -0.578125, |
| "logps/chosen": -0.87890625, |
| "logps/rejected": -1.125, |
| "loss": 0.9514, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00225830078125, |
| "rewards/margins": 0.008056640625, |
| "rewards/rejected": -0.01031494140625, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.36538461538461536, |
| "grad_norm": 205.59104124832635, |
| "learning_rate": 5.605541325914418e-07, |
| "logits/chosen": -0.462890625, |
| "logits/rejected": -0.462890625, |
| "logps/chosen": -0.6875, |
| "logps/rejected": -0.6015625, |
| "loss": 1.0734, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.005462646484375, |
| "rewards/margins": 0.0250244140625, |
| "rewards/rejected": -0.030517578125, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.36752136752136755, |
| "grad_norm": 38.5807267512022, |
| "learning_rate": 5.584619770670089e-07, |
| "logits/chosen": -0.2373046875, |
| "logits/rejected": -0.41015625, |
| "logps/chosen": -1.78125, |
| "logps/rejected": -1.703125, |
| "loss": 0.9011, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.022705078125, |
| "rewards/margins": -0.004119873046875, |
| "rewards/rejected": -0.0185546875, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3696581196581197, |
| "grad_norm": 56.16265649435291, |
| "learning_rate": 5.563582134626227e-07, |
| "logits/chosen": -0.5546875, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.87109375, |
| "logps/rejected": -1.828125, |
| "loss": 0.9905, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.009765625, |
| "rewards/margins": 0.048828125, |
| "rewards/rejected": -0.038818359375, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3717948717948718, |
| "grad_norm": 33.662597431065436, |
| "learning_rate": 5.542429589250953e-07, |
| "logits/chosen": -0.46484375, |
| "logits/rejected": -0.4375, |
| "logps/chosen": -1.9375, |
| "logps/rejected": -2.125, |
| "loss": 0.9218, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.01318359375, |
| "rewards/margins": 0.041015625, |
| "rewards/rejected": -0.02783203125, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.37393162393162394, |
| "grad_norm": 39.14271257757384, |
| "learning_rate": 5.52116331241105e-07, |
| "logits/chosen": -0.46875, |
| "logits/rejected": -0.37890625, |
| "logps/chosen": -0.9609375, |
| "logps/rejected": -1.34375, |
| "loss": 0.9592, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01220703125, |
| "rewards/margins": -0.03564453125, |
| "rewards/rejected": 0.0235595703125, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.37606837606837606, |
| "grad_norm": 19.69124876410317, |
| "learning_rate": 5.499784488306366e-07, |
| "logits/chosen": -0.330078125, |
| "logits/rejected": -0.265625, |
| "logps/chosen": -1.34375, |
| "logps/rejected": -0.73828125, |
| "loss": 0.9491, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0093994140625, |
| "rewards/margins": 0.047119140625, |
| "rewards/rejected": -0.056640625, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.3782051282051282, |
| "grad_norm": 19.592787918798177, |
| "learning_rate": 5.47829430740388e-07, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.41015625, |
| "logps/chosen": -0.8671875, |
| "logps/rejected": -1.375, |
| "loss": 0.9664, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.000213623046875, |
| "rewards/margins": 0.04931640625, |
| "rewards/rejected": -0.049072265625, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3803418803418803, |
| "grad_norm": 41.26617381984967, |
| "learning_rate": 5.456693966371404e-07, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -0.9375, |
| "logps/rejected": -1.3203125, |
| "loss": 0.9364, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0040283203125, |
| "rewards/margins": 0.04296875, |
| "rewards/rejected": -0.046875, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.38247863247863245, |
| "grad_norm": 47.31641000734864, |
| "learning_rate": 5.43498466801095e-07, |
| "logits/chosen": -0.392578125, |
| "logits/rejected": -0.337890625, |
| "logps/chosen": -0.6484375, |
| "logps/rejected": -1.0625, |
| "loss": 0.9885, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.0037841796875, |
| "rewards/margins": 0.055419921875, |
| "rewards/rejected": -0.0517578125, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 31.34178974992674, |
| "learning_rate": 5.413167621191755e-07, |
| "logits/chosen": -0.318359375, |
| "logits/rejected": -0.306640625, |
| "logps/chosen": -1.2421875, |
| "logps/rejected": -1.328125, |
| "loss": 0.9866, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.03173828125, |
| "rewards/margins": 0.0196533203125, |
| "rewards/rejected": -0.05126953125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.38675213675213677, |
| "grad_norm": 38.69009356294788, |
| "learning_rate": 5.391244040782964e-07, |
| "logits/chosen": -0.71875, |
| "logits/rejected": -0.7578125, |
| "logps/chosen": -2.34375, |
| "logps/rejected": -2.4375, |
| "loss": 0.9636, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0634765625, |
| "rewards/margins": 0.0201416015625, |
| "rewards/rejected": -0.08349609375, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3888888888888889, |
| "grad_norm": 66.88208286581656, |
| "learning_rate": 5.369215147585981e-07, |
| "logits/chosen": -0.76171875, |
| "logits/rejected": -0.609375, |
| "logps/chosen": -1.3125, |
| "logps/rejected": -0.84375, |
| "loss": 1.0153, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0220947265625, |
| "rewards/margins": -0.00927734375, |
| "rewards/rejected": -0.0128173828125, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.391025641025641, |
| "grad_norm": 19.918239619320428, |
| "learning_rate": 5.347082168266491e-07, |
| "logits/chosen": -0.44140625, |
| "logits/rejected": -0.466796875, |
| "logps/chosen": -0.6015625, |
| "logps/rejected": -1.640625, |
| "loss": 0.9355, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.003173828125, |
| "rewards/margins": 0.017333984375, |
| "rewards/rejected": -0.01409912109375, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.39316239316239315, |
| "grad_norm": 54.06880890834597, |
| "learning_rate": 5.324846335286148e-07, |
| "logits/chosen": -0.3984375, |
| "logits/rejected": -0.50390625, |
| "logps/chosen": -0.91015625, |
| "logps/rejected": -0.79296875, |
| "loss": 0.9766, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.04443359375, |
| "rewards/margins": 0.0167236328125, |
| "rewards/rejected": -0.0615234375, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3952991452991453, |
| "grad_norm": 60.553261993821486, |
| "learning_rate": 5.302508886833953e-07, |
| "logits/chosen": -0.2314453125, |
| "logits/rejected": -0.19921875, |
| "logps/chosen": -0.7265625, |
| "logps/rejected": -0.78515625, |
| "loss": 0.9797, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.001129150390625, |
| "rewards/margins": 0.060546875, |
| "rewards/rejected": -0.0615234375, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3974358974358974, |
| "grad_norm": 46.18394100489199, |
| "learning_rate": 5.280071066757304e-07, |
| "logits/chosen": -0.498046875, |
| "logits/rejected": -0.48046875, |
| "logps/chosen": -1.171875, |
| "logps/rejected": -1.359375, |
| "loss": 0.9438, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01043701171875, |
| "rewards/margins": 0.01708984375, |
| "rewards/rejected": -0.0274658203125, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3995726495726496, |
| "grad_norm": 26.559128159512724, |
| "learning_rate": 5.25753412449273e-07, |
| "logits/chosen": -0.337890625, |
| "logits/rejected": -0.271484375, |
| "logps/chosen": -0.69921875, |
| "logps/rejected": -0.546875, |
| "loss": 0.9596, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.01123046875, |
| "rewards/margins": 0.023193359375, |
| "rewards/rejected": -0.011962890625, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.4017094017094017, |
| "grad_norm": 77.25370785550327, |
| "learning_rate": 5.234899314996325e-07, |
| "logits/chosen": -0.4296875, |
| "logits/rejected": -0.369140625, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.7734375, |
| "loss": 1.052, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.002410888671875, |
| "rewards/margins": 0.035400390625, |
| "rewards/rejected": -0.037841796875, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.40384615384615385, |
| "grad_norm": 79.5046519078694, |
| "learning_rate": 5.212167898673855e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.51171875, |
| "logps/chosen": -1.0078125, |
| "logps/rejected": -0.92578125, |
| "loss": 1.0046, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0224609375, |
| "rewards/margins": 0.030029296875, |
| "rewards/rejected": -0.052490234375, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.405982905982906, |
| "grad_norm": 61.583682807858615, |
| "learning_rate": 5.189341141310579e-07, |
| "logits/chosen": -0.6640625, |
| "logits/rejected": -0.5703125, |
| "logps/chosen": -1.6015625, |
| "logps/rejected": -1.53125, |
| "loss": 1.006, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.013916015625, |
| "rewards/margins": 0.00494384765625, |
| "rewards/rejected": 0.0089111328125, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4081196581196581, |
| "grad_norm": 15.327322287722419, |
| "learning_rate": 5.166420314000771e-07, |
| "logits/chosen": -0.51953125, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.890625, |
| "logps/rejected": -0.72265625, |
| "loss": 0.9421, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.025390625, |
| "rewards/margins": 0.003662109375, |
| "rewards/rejected": -0.0289306640625, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 54.74166945437457, |
| "learning_rate": 5.143406693076928e-07, |
| "logits/chosen": -0.56640625, |
| "logits/rejected": -0.453125, |
| "logps/chosen": -0.96875, |
| "logps/rejected": -1.21875, |
| "loss": 0.9989, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.013916015625, |
| "rewards/margins": 0.0242919921875, |
| "rewards/rejected": -0.038330078125, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.41239316239316237, |
| "grad_norm": 62.61311550998391, |
| "learning_rate": 5.120301560038705e-07, |
| "logits/chosen": -0.384765625, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -0.73828125, |
| "logps/rejected": -0.71875, |
| "loss": 1.0123, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.001953125, |
| "rewards/margins": 0.0201416015625, |
| "rewards/rejected": -0.0220947265625, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.41452991452991456, |
| "grad_norm": 41.1464773305956, |
| "learning_rate": 5.097106201481553e-07, |
| "logits/chosen": -0.765625, |
| "logits/rejected": -0.69921875, |
| "logps/chosen": -1.0625, |
| "logps/rejected": -0.9140625, |
| "loss": 0.964, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.00640869140625, |
| "rewards/margins": 0.0130615234375, |
| "rewards/rejected": -0.006561279296875, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 18.43312573869149, |
| "learning_rate": 5.073821909025078e-07, |
| "logits/chosen": -0.77734375, |
| "logits/rejected": -0.71484375, |
| "logps/chosen": -0.6875, |
| "logps/rejected": -1.078125, |
| "loss": 0.9498, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.014404296875, |
| "rewards/margins": 0.050537109375, |
| "rewards/rejected": -0.06494140625, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4188034188034188, |
| "grad_norm": 71.4495599627426, |
| "learning_rate": 5.050449979241119e-07, |
| "logits/chosen": -0.494140625, |
| "logits/rejected": -0.4921875, |
| "logps/chosen": -0.71875, |
| "logps/rejected": -1.2265625, |
| "loss": 1.0096, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.00286865234375, |
| "rewards/margins": 0.03662109375, |
| "rewards/rejected": -0.03955078125, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.42094017094017094, |
| "grad_norm": 19.446033419100402, |
| "learning_rate": 5.026991713581543e-07, |
| "logits/chosen": -0.27734375, |
| "logits/rejected": -0.33984375, |
| "logps/chosen": -0.6171875, |
| "logps/rejected": -0.72265625, |
| "loss": 0.9258, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.015625, |
| "rewards/margins": 0.03076171875, |
| "rewards/rejected": -0.046142578125, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4230769230769231, |
| "grad_norm": 24.647809279268525, |
| "learning_rate": 5.003448418305781e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.91796875, |
| "logps/rejected": -1.6875, |
| "loss": 0.8944, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.056396484375, |
| "rewards/margins": 0.0712890625, |
| "rewards/rejected": -0.1279296875, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.4252136752136752, |
| "grad_norm": 29.9361578160495, |
| "learning_rate": 4.979821404408084e-07, |
| "logits/chosen": -0.55078125, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.6328125, |
| "logps/rejected": -0.9140625, |
| "loss": 0.9269, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0133056640625, |
| "rewards/margins": 0.0537109375, |
| "rewards/rejected": -0.06689453125, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "grad_norm": 29.028111116310793, |
| "learning_rate": 4.956111987544529e-07, |
| "logits/chosen": -0.39453125, |
| "logits/rejected": -0.5, |
| "logps/chosen": -1.8515625, |
| "logps/rejected": -1.890625, |
| "loss": 0.9635, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.02392578125, |
| "rewards/margins": 0.05078125, |
| "rewards/rejected": -0.0751953125, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "eval_logits/chosen": -0.56640625, |
| "eval_logits/rejected": -0.5625, |
| "eval_logps/chosen": -1.03125, |
| "eval_logps/rejected": -1.09375, |
| "eval_loss": 0.9727697968482971, |
| "eval_rewards/accuracies": 0.6774193644523621, |
| "eval_rewards/chosen": -0.0037994384765625, |
| "eval_rewards/margins": 0.046142578125, |
| "eval_rewards/rejected": -0.050048828125, |
| "eval_runtime": 104.9199, |
| "eval_samples_per_second": 18.69, |
| "eval_steps_per_second": 0.591, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.42948717948717946, |
| "grad_norm": 19.81681568724776, |
| "learning_rate": 4.932321487959748e-07, |
| "logits/chosen": -0.48046875, |
| "logits/rejected": -0.494140625, |
| "logps/chosen": -0.63671875, |
| "logps/rejected": -0.671875, |
| "loss": 0.9302, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.010498046875, |
| "rewards/margins": 0.0150146484375, |
| "rewards/rejected": -0.004486083984375, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.43162393162393164, |
| "grad_norm": 63.3680908382062, |
| "learning_rate": 4.908451230413419e-07, |
| "logits/chosen": -0.71484375, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -0.59765625, |
| "logps/rejected": -0.57421875, |
| "loss": 1.0619, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.004058837890625, |
| "rewards/margins": 0.0101318359375, |
| "rewards/rejected": -0.006072998046875, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4337606837606838, |
| "grad_norm": 27.94217013723975, |
| "learning_rate": 4.884502544106492e-07, |
| "logits/chosen": -0.46875, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -0.734375, |
| "loss": 0.9215, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01318359375, |
| "rewards/margins": 0.005615234375, |
| "rewards/rejected": -0.018798828125, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4358974358974359, |
| "grad_norm": 40.54930795227308, |
| "learning_rate": 4.860476762607174e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.45703125, |
| "logps/chosen": -0.5, |
| "logps/rejected": -0.51171875, |
| "loss": 0.9994, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.003936767578125, |
| "rewards/margins": 0.014404296875, |
| "rewards/rejected": -0.01043701171875, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.43803418803418803, |
| "grad_norm": 39.57095333313773, |
| "learning_rate": 4.836375223776678e-07, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": -0.53125, |
| "logps/chosen": -1.71875, |
| "logps/rejected": -1.6171875, |
| "loss": 0.9875, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0859375, |
| "rewards/margins": 0.000701904296875, |
| "rewards/rejected": 0.0849609375, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.44017094017094016, |
| "grad_norm": 18.511716887685402, |
| "learning_rate": 4.812199269694711e-07, |
| "logits/chosen": -0.62109375, |
| "logits/rejected": -0.57421875, |
| "logps/chosen": -0.51953125, |
| "logps/rejected": -0.53515625, |
| "loss": 0.9124, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0010986328125, |
| "rewards/margins": 0.006103515625, |
| "rewards/rejected": -0.0072021484375, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4423076923076923, |
| "grad_norm": 57.69428013444241, |
| "learning_rate": 4.787950246584753e-07, |
| "logits/chosen": -0.76171875, |
| "logits/rejected": -0.76171875, |
| "logps/chosen": -0.4921875, |
| "logps/rejected": -0.52734375, |
| "loss": 0.9723, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00738525390625, |
| "rewards/margins": 0.01373291015625, |
| "rewards/rejected": -0.0211181640625, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 75.86709550039322, |
| "learning_rate": 4.7636295047390865e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.56640625, |
| "logps/chosen": -2.84375, |
| "logps/rejected": -2.0, |
| "loss": 1.0264, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.1962890625, |
| "rewards/margins": 0.177734375, |
| "rewards/rejected": 0.0189208984375, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4465811965811966, |
| "grad_norm": 24.333855810540772, |
| "learning_rate": 4.7392383984436104e-07, |
| "logits/chosen": -0.38671875, |
| "logits/rejected": -0.357421875, |
| "logps/chosen": -0.765625, |
| "logps/rejected": -1.6875, |
| "loss": 0.8757, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.001495361328125, |
| "rewards/margins": 0.11474609375, |
| "rewards/rejected": -0.11328125, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.44871794871794873, |
| "grad_norm": 44.84857230941575, |
| "learning_rate": 4.7147782859024246e-07, |
| "logits/chosen": -0.7109375, |
| "logits/rejected": -0.65234375, |
| "logps/chosen": -1.5625, |
| "logps/rejected": -1.484375, |
| "loss": 1.0098, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.01019287109375, |
| "rewards/margins": 0.00518798828125, |
| "rewards/rejected": 0.00506591796875, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.45085470085470086, |
| "grad_norm": 50.30855337720552, |
| "learning_rate": 4.6902505291622014e-07, |
| "logits/chosen": -0.7265625, |
| "logits/rejected": -0.73828125, |
| "logps/chosen": -0.71484375, |
| "logps/rejected": -0.8046875, |
| "loss": 0.9634, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.02978515625, |
| "rewards/margins": 0.014404296875, |
| "rewards/rejected": -0.04443359375, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.452991452991453, |
| "grad_norm": 55.94011111026385, |
| "learning_rate": 4.66565649403634e-07, |
| "logits/chosen": -0.49609375, |
| "logits/rejected": -0.5, |
| "logps/chosen": -1.3125, |
| "logps/rejected": -1.78125, |
| "loss": 0.9763, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.00958251953125, |
| "rewards/margins": 0.0419921875, |
| "rewards/rejected": -0.032470703125, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4551282051282051, |
| "grad_norm": 84.25142664538966, |
| "learning_rate": 4.6409975500289086e-07, |
| "logits/chosen": -0.484375, |
| "logits/rejected": -0.46875, |
| "logps/chosen": -0.66796875, |
| "logps/rejected": -0.703125, |
| "loss": 1.0564, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00836181640625, |
| "rewards/margins": 0.0206298828125, |
| "rewards/rejected": -0.0289306640625, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.45726495726495725, |
| "grad_norm": 20.75895587246269, |
| "learning_rate": 4.6162750702583916e-07, |
| "logits/chosen": -0.40625, |
| "logits/rejected": -0.40625, |
| "logps/chosen": -0.62890625, |
| "logps/rejected": -0.63671875, |
| "loss": 0.9518, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.0025634765625, |
| "rewards/margins": 0.03271484375, |
| "rewards/rejected": -0.030029296875, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4594017094017094, |
| "grad_norm": 41.23220742312626, |
| "learning_rate": 4.591490431381221e-07, |
| "logits/chosen": -0.466796875, |
| "logits/rejected": -0.50390625, |
| "logps/chosen": -0.609375, |
| "logps/rejected": -0.75390625, |
| "loss": 1.0527, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.016845703125, |
| "rewards/margins": 0.0010986328125, |
| "rewards/rejected": -0.01806640625, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 44.974264192003716, |
| "learning_rate": 4.5666450135151236e-07, |
| "logits/chosen": -0.77734375, |
| "logits/rejected": -0.8359375, |
| "logps/chosen": -0.73828125, |
| "logps/rejected": -1.375, |
| "loss": 0.9866, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0224609375, |
| "rewards/margins": 0.051025390625, |
| "rewards/rejected": -0.07373046875, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4636752136752137, |
| "grad_norm": 29.27367889944009, |
| "learning_rate": 4.541740200162266e-07, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -2.125, |
| "logps/rejected": -1.359375, |
| "loss": 0.939, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0771484375, |
| "rewards/margins": 0.036865234375, |
| "rewards/rejected": -0.11376953125, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4658119658119658, |
| "grad_norm": 31.584259547296156, |
| "learning_rate": 4.5167773781322175e-07, |
| "logits/chosen": -0.6015625, |
| "logits/rejected": -0.431640625, |
| "logps/chosen": -2.109375, |
| "logps/rejected": -2.765625, |
| "loss": 0.9585, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0279541015625, |
| "rewards/margins": 0.08349609375, |
| "rewards/rejected": -0.111328125, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.46794871794871795, |
| "grad_norm": 42.48284320276936, |
| "learning_rate": 4.4917579374647265e-07, |
| "logits/chosen": -0.4609375, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.68359375, |
| "loss": 0.9618, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0155029296875, |
| "rewards/margins": 0.0089111328125, |
| "rewards/rejected": -0.0244140625, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4700854700854701, |
| "grad_norm": 18.23118783681457, |
| "learning_rate": 4.466683271352315e-07, |
| "logits/chosen": -0.49609375, |
| "logits/rejected": -0.345703125, |
| "logps/chosen": -0.734375, |
| "logps/rejected": -0.79296875, |
| "loss": 0.9417, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0174560546875, |
| "rewards/margins": 0.019287109375, |
| "rewards/rejected": -0.036865234375, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4722222222222222, |
| "grad_norm": 26.2463503471584, |
| "learning_rate": 4.4415547760627006e-07, |
| "logits/chosen": -0.271484375, |
| "logits/rejected": -0.330078125, |
| "logps/chosen": -1.0078125, |
| "logps/rejected": -1.25, |
| "loss": 0.9237, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.03955078125, |
| "rewards/margins": 0.052001953125, |
| "rewards/rejected": -0.09130859375, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.47435897435897434, |
| "grad_norm": 34.08188430294409, |
| "learning_rate": 4.416373850861047e-07, |
| "logits/chosen": -0.66796875, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -1.7578125, |
| "logps/rejected": -1.3359375, |
| "loss": 1.0026, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0211181640625, |
| "rewards/margins": -0.0009765625, |
| "rewards/rejected": -0.0201416015625, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.47649572649572647, |
| "grad_norm": 21.569342340756666, |
| "learning_rate": 4.391141897932045e-07, |
| "logits/chosen": -0.640625, |
| "logits/rejected": -0.640625, |
| "logps/chosen": -0.56640625, |
| "logps/rejected": -0.486328125, |
| "loss": 0.9335, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.01531982421875, |
| "rewards/margins": 0.0142822265625, |
| "rewards/rejected": -0.029541015625, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.47863247863247865, |
| "grad_norm": 30.256607772494906, |
| "learning_rate": 4.3658603223018377e-07, |
| "logits/chosen": -0.40234375, |
| "logits/rejected": -0.40625, |
| "logps/chosen": -0.99609375, |
| "logps/rejected": -1.1640625, |
| "loss": 0.9175, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0196533203125, |
| "rewards/margins": 0.08984375, |
| "rewards/rejected": -0.109375, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4807692307692308, |
| "grad_norm": 102.05207305719217, |
| "learning_rate": 4.340530531759773e-07, |
| "logits/chosen": -0.625, |
| "logits/rejected": -0.65234375, |
| "logps/chosen": -0.7578125, |
| "logps/rejected": -1.4140625, |
| "loss": 1.035, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0263671875, |
| "rewards/margins": 0.012939453125, |
| "rewards/rejected": -0.039306640625, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4829059829059829, |
| "grad_norm": 30.438856795785753, |
| "learning_rate": 4.3151539367800197e-07, |
| "logits/chosen": -0.75, |
| "logits/rejected": -0.6953125, |
| "logps/chosen": -1.4296875, |
| "logps/rejected": -1.234375, |
| "loss": 0.9395, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0167236328125, |
| "rewards/margins": -0.0120849609375, |
| "rewards/rejected": -0.00469970703125, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.48504273504273504, |
| "grad_norm": 64.65057839238729, |
| "learning_rate": 4.289731950443024e-07, |
| "logits/chosen": -0.55859375, |
| "logits/rejected": -0.51171875, |
| "logps/chosen": -0.7421875, |
| "logps/rejected": -0.6171875, |
| "loss": 1.0064, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.00738525390625, |
| "rewards/margins": 0.00927734375, |
| "rewards/rejected": -0.0166015625, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.48717948717948717, |
| "grad_norm": 22.221149752600365, |
| "learning_rate": 4.2642659883568226e-07, |
| "logits/chosen": -0.66015625, |
| "logits/rejected": -0.60546875, |
| "logps/chosen": -0.87109375, |
| "logps/rejected": -0.93359375, |
| "loss": 0.8986, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.07080078125, |
| "rewards/margins": 0.024658203125, |
| "rewards/rejected": -0.09521484375, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4893162393162393, |
| "grad_norm": 34.72558335941602, |
| "learning_rate": 4.2387574685782143e-07, |
| "logits/chosen": -0.44921875, |
| "logits/rejected": -0.49609375, |
| "logps/chosen": -0.61328125, |
| "logps/rejected": -1.5859375, |
| "loss": 0.9515, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.00750732421875, |
| "rewards/margins": 0.05322265625, |
| "rewards/rejected": -0.060546875, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.49145299145299143, |
| "grad_norm": 40.26736969888996, |
| "learning_rate": 4.213207811533797e-07, |
| "logits/chosen": -0.453125, |
| "logits/rejected": -0.392578125, |
| "logps/chosen": -0.96875, |
| "logps/rejected": -1.6328125, |
| "loss": 0.9952, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.06640625, |
| "rewards/margins": 0.06396484375, |
| "rewards/rejected": -0.1298828125, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4935897435897436, |
| "grad_norm": 26.92761732861108, |
| "learning_rate": 4.1876184399408744e-07, |
| "logits/chosen": -0.462890625, |
| "logits/rejected": -0.451171875, |
| "logps/chosen": -1.1328125, |
| "logps/rejected": -1.203125, |
| "loss": 0.9368, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.030029296875, |
| "rewards/margins": 0.04736328125, |
| "rewards/rejected": -0.07763671875, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.49572649572649574, |
| "grad_norm": 17.793046500243925, |
| "learning_rate": 4.161990778728231e-07, |
| "logits/chosen": -0.2392578125, |
| "logits/rejected": -0.197265625, |
| "logps/chosen": -0.7421875, |
| "logps/rejected": -1.09375, |
| "loss": 0.948, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.058837890625, |
| "rewards/margins": 0.0166015625, |
| "rewards/rejected": -0.07568359375, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.49786324786324787, |
| "grad_norm": 20.994788691900062, |
| "learning_rate": 4.136326254956784e-07, |
| "logits/chosen": -0.3671875, |
| "logits/rejected": -0.390625, |
| "logps/chosen": -1.3046875, |
| "logps/rejected": -0.8828125, |
| "loss": 0.9448, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00347900390625, |
| "rewards/margins": 0.003326416015625, |
| "rewards/rejected": -0.006866455078125, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 33.14361065612054, |
| "learning_rate": 4.110626297740122e-07, |
| "logits/chosen": -0.59375, |
| "logits/rejected": -0.68359375, |
| "logps/chosen": -1.1640625, |
| "logps/rejected": -0.88671875, |
| "loss": 0.9075, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.03857421875, |
| "rewards/margins": -0.00201416015625, |
| "rewards/rejected": -0.03662109375, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5021367521367521, |
| "grad_norm": 23.710110812392536, |
| "learning_rate": 4.0848923381649195e-07, |
| "logits/chosen": -0.62109375, |
| "logits/rejected": -0.625, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.921875, |
| "loss": 0.9398, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0179443359375, |
| "rewards/margins": 0.005706787109375, |
| "rewards/rejected": -0.023681640625, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5042735042735043, |
| "grad_norm": 20.519140914016205, |
| "learning_rate": 4.059125809211259e-07, |
| "logits/chosen": -0.72265625, |
| "logits/rejected": -0.69140625, |
| "logps/chosen": -0.98046875, |
| "logps/rejected": -0.8515625, |
| "loss": 0.9618, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.022216796875, |
| "rewards/margins": 0.028076171875, |
| "rewards/rejected": -0.05029296875, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5064102564102564, |
| "grad_norm": 55.432096028551676, |
| "learning_rate": 4.033328145672822e-07, |
| "logits/chosen": -0.39453125, |
| "logits/rejected": -0.51171875, |
| "logps/chosen": -1.3828125, |
| "logps/rejected": -1.640625, |
| "loss": 1.0188, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.00909423828125, |
| "rewards/margins": 0.0157470703125, |
| "rewards/rejected": -0.00665283203125, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5085470085470085, |
| "grad_norm": 46.20803113821289, |
| "learning_rate": 4.007500784077006e-07, |
| "logits/chosen": -0.56640625, |
| "logits/rejected": -0.490234375, |
| "logps/chosen": -0.90625, |
| "logps/rejected": -0.8203125, |
| "loss": 0.9741, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00634765625, |
| "rewards/margins": 0.0164794921875, |
| "rewards/rejected": -0.0228271484375, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5106837606837606, |
| "grad_norm": 54.70162005395297, |
| "learning_rate": 3.9816451626049247e-07, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.490234375, |
| "logps/chosen": -1.1171875, |
| "logps/rejected": -0.8359375, |
| "loss": 0.9551, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0322265625, |
| "rewards/margins": 0.024658203125, |
| "rewards/rejected": -0.056884765625, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 21.189791976104605, |
| "learning_rate": 3.9557627210113264e-07, |
| "logits/chosen": -0.68359375, |
| "logits/rejected": -0.58203125, |
| "logps/chosen": -1.25, |
| "logps/rejected": -1.328125, |
| "loss": 0.9316, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.01458740234375, |
| "rewards/margins": 0.01361083984375, |
| "rewards/rejected": 0.0009765625, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5149572649572649, |
| "grad_norm": 30.828528043387475, |
| "learning_rate": 3.92985490054442e-07, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.50390625, |
| "logps/chosen": -1.1640625, |
| "logps/rejected": -1.5, |
| "loss": 0.9681, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0245361328125, |
| "rewards/margins": 0.042724609375, |
| "rewards/rejected": -0.0673828125, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5170940170940171, |
| "grad_norm": 41.70036042913, |
| "learning_rate": 3.903923143865625e-07, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.60546875, |
| "logps/chosen": -1.28125, |
| "logps/rejected": -2.078125, |
| "loss": 0.9426, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.033447265625, |
| "rewards/margins": 0.0458984375, |
| "rewards/rejected": -0.07958984375, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5192307692307693, |
| "grad_norm": 73.93778223271919, |
| "learning_rate": 3.8779688949692316e-07, |
| "logits/chosen": -0.498046875, |
| "logits/rejected": -0.51171875, |
| "logps/chosen": -2.859375, |
| "logps/rejected": -1.8125, |
| "loss": 1.0366, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.02685546875, |
| "rewards/margins": 0.00848388671875, |
| "rewards/rejected": -0.035400390625, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5213675213675214, |
| "grad_norm": 35.776829076433316, |
| "learning_rate": 3.851993599101998e-07, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.66796875, |
| "logps/chosen": -0.6875, |
| "logps/rejected": -0.64453125, |
| "loss": 0.9495, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.04052734375, |
| "rewards/margins": 0.0096435546875, |
| "rewards/rejected": -0.05029296875, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5235042735042735, |
| "grad_norm": 27.238620302576226, |
| "learning_rate": 3.825998702682668e-07, |
| "logits/chosen": -0.7109375, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -0.70703125, |
| "logps/rejected": -0.671875, |
| "loss": 0.9001, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.00372314453125, |
| "rewards/margins": 0.0289306640625, |
| "rewards/rejected": -0.025146484375, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5256410256410257, |
| "grad_norm": 20.872195276246313, |
| "learning_rate": 3.799985653221433e-07, |
| "logits/chosen": -0.494140625, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.734375, |
| "logps/rejected": -1.03125, |
| "loss": 0.9192, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.037109375, |
| "rewards/margins": 0.004852294921875, |
| "rewards/rejected": -0.0419921875, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5277777777777778, |
| "grad_norm": 37.24895950193556, |
| "learning_rate": 3.773955899239325e-07, |
| "logits/chosen": -0.63671875, |
| "logits/rejected": -0.62109375, |
| "logps/chosen": -0.67578125, |
| "logps/rejected": -0.75, |
| "loss": 0.9956, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0145263671875, |
| "rewards/margins": 0.013427734375, |
| "rewards/rejected": -0.02783203125, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5299145299145299, |
| "grad_norm": 25.848328585587293, |
| "learning_rate": 3.747910890187553e-07, |
| "logits/chosen": -0.3828125, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -0.70703125, |
| "logps/rejected": -0.6484375, |
| "loss": 0.9127, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.0157470703125, |
| "rewards/margins": 0.0322265625, |
| "rewards/rejected": -0.0166015625, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.532051282051282, |
| "grad_norm": 34.06646508786053, |
| "learning_rate": 3.7218520763667986e-07, |
| "logits/chosen": -0.734375, |
| "logits/rejected": -0.75390625, |
| "logps/chosen": -1.5703125, |
| "logps/rejected": -1.3515625, |
| "loss": 0.9003, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01263427734375, |
| "rewards/margins": -0.0029296875, |
| "rewards/rejected": -0.00970458984375, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5341880341880342, |
| "grad_norm": 41.28995974148372, |
| "learning_rate": 3.695780908846459e-07, |
| "logits/chosen": -0.796875, |
| "logits/rejected": -0.765625, |
| "logps/chosen": -0.87109375, |
| "logps/rejected": -0.76953125, |
| "loss": 0.9581, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004302978515625, |
| "rewards/margins": 0.013671875, |
| "rewards/rejected": -0.01806640625, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5363247863247863, |
| "grad_norm": 11.821036836394699, |
| "learning_rate": 3.669698839383829e-07, |
| "logits/chosen": -0.46484375, |
| "logits/rejected": -0.48046875, |
| "logps/chosen": -0.466796875, |
| "logps/rejected": -0.47265625, |
| "loss": 0.9258, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0107421875, |
| "rewards/margins": 0.0260009765625, |
| "rewards/rejected": -0.036865234375, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 35.07428065878141, |
| "learning_rate": 3.6436073203432805e-07, |
| "logits/chosen": -0.416015625, |
| "logits/rejected": -0.396484375, |
| "logps/chosen": -0.9375, |
| "logps/rejected": -1.75, |
| "loss": 0.959, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.008544921875, |
| "rewards/margins": 0.0277099609375, |
| "rewards/rejected": -0.036376953125, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5405982905982906, |
| "grad_norm": 23.55218496365697, |
| "learning_rate": 3.6175078046153744e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.59765625, |
| "logps/chosen": -0.765625, |
| "logps/rejected": -1.59375, |
| "loss": 0.9366, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.045166015625, |
| "rewards/margins": -0.0059814453125, |
| "rewards/rejected": -0.0390625, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5427350427350427, |
| "grad_norm": 20.862502117987717, |
| "learning_rate": 3.591401745535965e-07, |
| "logits/chosen": -0.63671875, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -0.49609375, |
| "logps/rejected": -0.5078125, |
| "loss": 0.921, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.021484375, |
| "rewards/margins": 0.014404296875, |
| "rewards/rejected": -0.035888671875, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5448717948717948, |
| "grad_norm": 29.660987179522632, |
| "learning_rate": 3.5652905968052676e-07, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.58203125, |
| "logps/chosen": -0.74609375, |
| "logps/rejected": -0.7578125, |
| "loss": 0.942, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.07080078125, |
| "rewards/margins": 0.01129150390625, |
| "rewards/rejected": -0.08203125, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5470085470085471, |
| "grad_norm": 18.529268471979883, |
| "learning_rate": 3.5391758124069124e-07, |
| "logits/chosen": -0.40234375, |
| "logits/rejected": -0.48046875, |
| "logps/chosen": -1.09375, |
| "logps/rejected": -1.328125, |
| "loss": 0.9069, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.003662109375, |
| "rewards/margins": 0.0174560546875, |
| "rewards/rejected": -0.02099609375, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5491452991452992, |
| "grad_norm": 109.6700121132754, |
| "learning_rate": 3.5130588465269785e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.5234375, |
| "logps/chosen": -0.546875, |
| "logps/rejected": -0.51171875, |
| "loss": 1.048, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.021240234375, |
| "rewards/margins": 0.0185546875, |
| "rewards/rejected": -0.039794921875, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5512820512820513, |
| "grad_norm": 19.426233176236774, |
| "learning_rate": 3.486941153473021e-07, |
| "logits/chosen": -0.75, |
| "logits/rejected": -0.8046875, |
| "logps/chosen": -2.03125, |
| "logps/rejected": -2.453125, |
| "loss": 0.9047, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.1357421875, |
| "rewards/margins": 0.07421875, |
| "rewards/rejected": -0.2099609375, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5534188034188035, |
| "grad_norm": 46.82508695281614, |
| "learning_rate": 3.460824187593088e-07, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": -0.474609375, |
| "logps/chosen": -1.328125, |
| "logps/rejected": -2.46875, |
| "loss": 1.02, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.029052734375, |
| "rewards/margins": -0.00341796875, |
| "rewards/rejected": -0.0257568359375, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 93.85201459491968, |
| "learning_rate": 3.4347094031947326e-07, |
| "logits/chosen": -0.609375, |
| "logits/rejected": -0.59375, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -1.1328125, |
| "loss": 0.9739, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.05224609375, |
| "rewards/margins": 0.02880859375, |
| "rewards/rejected": -0.0810546875, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5576923076923077, |
| "grad_norm": 25.035527126644347, |
| "learning_rate": 3.408598254464035e-07, |
| "logits/chosen": -0.44921875, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -0.8359375, |
| "logps/rejected": -1.25, |
| "loss": 0.9421, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.029052734375, |
| "rewards/margins": -0.005859375, |
| "rewards/rejected": -0.023193359375, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5598290598290598, |
| "grad_norm": 25.292262701656554, |
| "learning_rate": 3.382492195384625e-07, |
| "logits/chosen": -0.392578125, |
| "logits/rejected": -0.392578125, |
| "logps/chosen": -0.828125, |
| "logps/rejected": -0.7421875, |
| "loss": 0.9186, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.024169921875, |
| "rewards/margins": 0.01904296875, |
| "rewards/rejected": -0.043212890625, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.561965811965812, |
| "grad_norm": 77.95609584102277, |
| "learning_rate": 3.3563926796567187e-07, |
| "logits/chosen": -0.625, |
| "logits/rejected": -0.6953125, |
| "logps/chosen": -0.76953125, |
| "logps/rejected": -0.8046875, |
| "loss": 1.082, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.021728515625, |
| "rewards/margins": 0.044189453125, |
| "rewards/rejected": -0.06591796875, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 31.432425326232472, |
| "learning_rate": 3.33030116061617e-07, |
| "logits/chosen": -0.64453125, |
| "logits/rejected": -0.6484375, |
| "logps/chosen": -1.5390625, |
| "logps/rejected": -1.2734375, |
| "loss": 0.8667, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.01904296875, |
| "rewards/margins": 0.06982421875, |
| "rewards/rejected": -0.0888671875, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5662393162393162, |
| "grad_norm": 47.53840395860865, |
| "learning_rate": 3.3042190911535425e-07, |
| "logits/chosen": -0.4609375, |
| "logits/rejected": -0.4453125, |
| "logps/chosen": -1.390625, |
| "logps/rejected": -2.359375, |
| "loss": 0.9517, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.00433349609375, |
| "rewards/margins": -0.0281982421875, |
| "rewards/rejected": 0.0322265625, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5683760683760684, |
| "grad_norm": 41.666834938138656, |
| "learning_rate": 3.278147923633201e-07, |
| "logits/chosen": -0.1318359375, |
| "logits/rejected": -0.0517578125, |
| "logps/chosen": -0.828125, |
| "logps/rejected": -1.015625, |
| "loss": 0.9088, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.04833984375, |
| "rewards/margins": 0.06494140625, |
| "rewards/rejected": -0.11328125, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5705128205128205, |
| "grad_norm": 25.14772301172709, |
| "learning_rate": 3.2520891098124484e-07, |
| "logits/chosen": -0.400390625, |
| "logits/rejected": -0.306640625, |
| "logps/chosen": -1.328125, |
| "logps/rejected": -1.609375, |
| "loss": 0.8958, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.05517578125, |
| "rewards/margins": 0.0751953125, |
| "rewards/rejected": -0.130859375, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5726495726495726, |
| "grad_norm": 29.876794568739818, |
| "learning_rate": 3.2260441007606763e-07, |
| "logits/chosen": -0.1708984375, |
| "logits/rejected": -0.19140625, |
| "logps/chosen": -1.0234375, |
| "logps/rejected": -2.265625, |
| "loss": 0.8914, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0859375, |
| "rewards/margins": 0.056396484375, |
| "rewards/rejected": -0.142578125, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5747863247863247, |
| "grad_norm": 28.482704912447183, |
| "learning_rate": 3.2000143467785667e-07, |
| "logits/chosen": -0.66796875, |
| "logits/rejected": -0.703125, |
| "logps/chosen": -2.0625, |
| "logps/rejected": -1.7890625, |
| "loss": 0.881, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.048583984375, |
| "rewards/margins": 0.1025390625, |
| "rewards/rejected": -0.05419921875, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5769230769230769, |
| "grad_norm": 30.390512025141742, |
| "learning_rate": 3.174001297317332e-07, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.416015625, |
| "logps/chosen": -1.34375, |
| "logps/rejected": -1.3828125, |
| "loss": 0.968, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.051513671875, |
| "rewards/margins": 0.0169677734375, |
| "rewards/rejected": -0.068359375, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5790598290598291, |
| "grad_norm": 50.43026193511625, |
| "learning_rate": 3.1480064008980024e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.53515625, |
| "logps/chosen": -0.703125, |
| "logps/rejected": -1.7265625, |
| "loss": 0.9518, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.003082275390625, |
| "rewards/margins": 0.025146484375, |
| "rewards/rejected": -0.028076171875, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5811965811965812, |
| "grad_norm": 31.084216885562007, |
| "learning_rate": 3.122031105030768e-07, |
| "logits/chosen": -0.55078125, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -0.953125, |
| "logps/rejected": -1.0078125, |
| "loss": 0.9025, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.00531005859375, |
| "rewards/margins": 0.029541015625, |
| "rewards/rejected": -0.0242919921875, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 21.587874916679418, |
| "learning_rate": 3.0960768561343756e-07, |
| "logits/chosen": -0.6640625, |
| "logits/rejected": -0.5703125, |
| "logps/chosen": -1.4453125, |
| "logps/rejected": -1.328125, |
| "loss": 0.9187, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.072265625, |
| "rewards/margins": 0.028076171875, |
| "rewards/rejected": -0.1005859375, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5854700854700855, |
| "grad_norm": 35.6378668836564, |
| "learning_rate": 3.07014509945558e-07, |
| "logits/chosen": -0.59375, |
| "logits/rejected": -0.515625, |
| "logps/chosen": -0.921875, |
| "logps/rejected": -0.84375, |
| "loss": 0.9381, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.053955078125, |
| "rewards/margins": 0.033935546875, |
| "rewards/rejected": -0.087890625, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5876068376068376, |
| "grad_norm": 73.21173382326778, |
| "learning_rate": 3.0442372789886744e-07, |
| "logits/chosen": -0.68359375, |
| "logits/rejected": -0.640625, |
| "logps/chosen": -0.84375, |
| "logps/rejected": -1.1796875, |
| "loss": 1.0093, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00958251953125, |
| "rewards/margins": 0.060791015625, |
| "rewards/rejected": -0.0703125, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5897435897435898, |
| "grad_norm": 25.70974817197716, |
| "learning_rate": 3.0183548373950755e-07, |
| "logits/chosen": -0.6796875, |
| "logits/rejected": -0.6640625, |
| "logps/chosen": -2.203125, |
| "logps/rejected": -2.515625, |
| "loss": 0.9109, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.044189453125, |
| "rewards/margins": 0.1103515625, |
| "rewards/rejected": -0.154296875, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5918803418803419, |
| "grad_norm": 46.216925593680635, |
| "learning_rate": 2.992499215922993e-07, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.54296875, |
| "logps/chosen": -1.0859375, |
| "logps/rejected": -1.078125, |
| "loss": 0.9515, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.007568359375, |
| "rewards/margins": 0.044189453125, |
| "rewards/rejected": -0.0517578125, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.594017094017094, |
| "grad_norm": 37.597203962581744, |
| "learning_rate": 2.966671854327177e-07, |
| "logits/chosen": -0.58984375, |
| "logits/rejected": -0.458984375, |
| "logps/chosen": -0.7421875, |
| "logps/rejected": -0.82421875, |
| "loss": 0.9862, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01470947265625, |
| "rewards/margins": 0.03125, |
| "rewards/rejected": -0.0458984375, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5961538461538461, |
| "grad_norm": 27.881961513246008, |
| "learning_rate": 2.9408741907887424e-07, |
| "logits/chosen": -0.54296875, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.90625, |
| "logps/rejected": -0.74609375, |
| "loss": 0.8914, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0211181640625, |
| "rewards/margins": 0.02392578125, |
| "rewards/rejected": -0.045166015625, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5982905982905983, |
| "grad_norm": 43.73080236556192, |
| "learning_rate": 2.91510766183508e-07, |
| "logits/chosen": -0.7890625, |
| "logits/rejected": -0.7734375, |
| "logps/chosen": -1.0625, |
| "logps/rejected": -1.03125, |
| "loss": 0.9796, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.029296875, |
| "rewards/margins": 0.047607421875, |
| "rewards/rejected": -0.07666015625, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6004273504273504, |
| "grad_norm": 46.70933914401127, |
| "learning_rate": 2.889373702259879e-07, |
| "logits/chosen": -0.8046875, |
| "logits/rejected": -0.77734375, |
| "logps/chosen": -0.63671875, |
| "logps/rejected": -0.62890625, |
| "loss": 0.9463, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.020263671875, |
| "rewards/margins": 0.019775390625, |
| "rewards/rejected": -0.0400390625, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6025641025641025, |
| "grad_norm": 30.57420853354831, |
| "learning_rate": 2.863673745043216e-07, |
| "logits/chosen": -0.9140625, |
| "logits/rejected": -0.79296875, |
| "logps/chosen": -1.6796875, |
| "logps/rejected": -1.078125, |
| "loss": 0.9337, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.027099609375, |
| "rewards/margins": 0.05419921875, |
| "rewards/rejected": -0.08154296875, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6047008547008547, |
| "grad_norm": 43.53013754695041, |
| "learning_rate": 2.838009221271769e-07, |
| "logits/chosen": -0.388671875, |
| "logits/rejected": -0.37109375, |
| "logps/chosen": -0.9296875, |
| "logps/rejected": -0.84765625, |
| "loss": 0.9728, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.04296875, |
| "rewards/margins": 0.0150146484375, |
| "rewards/rejected": -0.057861328125, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6068376068376068, |
| "grad_norm": 19.88438908623489, |
| "learning_rate": 2.812381560059126e-07, |
| "logits/chosen": -0.4296875, |
| "logits/rejected": -0.48828125, |
| "logps/chosen": -0.8984375, |
| "logps/rejected": -0.953125, |
| "loss": 0.8746, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0390625, |
| "rewards/margins": 0.03564453125, |
| "rewards/rejected": -0.07470703125, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6089743589743589, |
| "grad_norm": 23.40336854183335, |
| "learning_rate": 2.786792188466203e-07, |
| "logits/chosen": -0.5625, |
| "logits/rejected": -0.478515625, |
| "logps/chosen": -1.0703125, |
| "logps/rejected": -1.1640625, |
| "loss": 0.9618, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.04736328125, |
| "rewards/margins": -0.001007080078125, |
| "rewards/rejected": -0.046142578125, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6111111111111112, |
| "grad_norm": 72.08451890329711, |
| "learning_rate": 2.7612425314217865e-07, |
| "logits/chosen": -0.14453125, |
| "logits/rejected": -0.12451171875, |
| "logps/chosen": -1.0546875, |
| "logps/rejected": -1.109375, |
| "loss": 0.9712, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.027099609375, |
| "rewards/margins": 0.0859375, |
| "rewards/rejected": -0.11328125, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6132478632478633, |
| "grad_norm": 29.875118737426025, |
| "learning_rate": 2.7357340116431776e-07, |
| "logits/chosen": -0.302734375, |
| "logits/rejected": -0.19140625, |
| "logps/chosen": -0.59375, |
| "logps/rejected": -0.671875, |
| "loss": 0.837, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.0032196044921875, |
| "rewards/margins": 0.037841796875, |
| "rewards/rejected": -0.041015625, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 32.72926634266307, |
| "learning_rate": 2.7102680495569755e-07, |
| "logits/chosen": -0.423828125, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -0.8359375, |
| "logps/rejected": -1.25, |
| "loss": 0.9363, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.04541015625, |
| "rewards/margins": 0.0869140625, |
| "rewards/rejected": -0.1318359375, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6175213675213675, |
| "grad_norm": 72.79449105495546, |
| "learning_rate": 2.6848460632199805e-07, |
| "logits/chosen": -0.55859375, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -1.390625, |
| "logps/rejected": -2.28125, |
| "loss": 1.0001, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0673828125, |
| "rewards/margins": -0.03173828125, |
| "rewards/rejected": -0.035400390625, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6196581196581197, |
| "grad_norm": 205.0964309554123, |
| "learning_rate": 2.6594694682402267e-07, |
| "logits/chosen": -0.5, |
| "logits/rejected": -0.50390625, |
| "logps/chosen": -0.640625, |
| "logps/rejected": -0.63671875, |
| "loss": 1.5129, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.03369140625, |
| "rewards/margins": 0.01531982421875, |
| "rewards/rejected": -0.048828125, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6217948717948718, |
| "grad_norm": 19.915515429772448, |
| "learning_rate": 2.6341396776981614e-07, |
| "logits/chosen": -0.59765625, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -1.78125, |
| "logps/rejected": -1.484375, |
| "loss": 0.9001, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0201416015625, |
| "rewards/margins": 0.0291748046875, |
| "rewards/rejected": -0.049072265625, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6239316239316239, |
| "grad_norm": 28.296895506212497, |
| "learning_rate": 2.6088581020679536e-07, |
| "logits/chosen": -0.1181640625, |
| "logits/rejected": -0.1533203125, |
| "logps/chosen": -0.8515625, |
| "logps/rejected": -0.91796875, |
| "loss": 0.9131, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02294921875, |
| "rewards/margins": 0.048583984375, |
| "rewards/rejected": -0.0712890625, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6260683760683761, |
| "grad_norm": 33.11228677517524, |
| "learning_rate": 2.583626149138954e-07, |
| "logits/chosen": -0.6328125, |
| "logits/rejected": -0.55859375, |
| "logps/chosen": -1.359375, |
| "logps/rejected": -0.9453125, |
| "loss": 0.8931, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.02978515625, |
| "rewards/margins": 0.054443359375, |
| "rewards/rejected": -0.08447265625, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6282051282051282, |
| "grad_norm": 29.244535425237153, |
| "learning_rate": 2.5584452239373e-07, |
| "logits/chosen": -0.671875, |
| "logits/rejected": -0.71484375, |
| "logps/chosen": -1.59375, |
| "logps/rejected": -2.953125, |
| "loss": 0.9468, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0145263671875, |
| "rewards/margins": 0.060302734375, |
| "rewards/rejected": -0.07470703125, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6303418803418803, |
| "grad_norm": 24.469679488213703, |
| "learning_rate": 2.5333167286476864e-07, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.609375, |
| "logps/chosen": -0.734375, |
| "logps/rejected": -1.4453125, |
| "loss": 0.9009, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.03564453125, |
| "rewards/margins": 0.029541015625, |
| "rewards/rejected": -0.06494140625, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6324786324786325, |
| "grad_norm": 43.066148630199685, |
| "learning_rate": 2.5082420625352737e-07, |
| "logits/chosen": -0.390625, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -1.8984375, |
| "logps/rejected": -2.078125, |
| "loss": 0.9454, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.057861328125, |
| "rewards/margins": 0.08642578125, |
| "rewards/rejected": -0.14453125, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6346153846153846, |
| "grad_norm": 80.27061560705891, |
| "learning_rate": 2.4832226218677827e-07, |
| "logits/chosen": -0.58984375, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -2.125, |
| "logps/rejected": -2.21875, |
| "loss": 0.9612, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.1201171875, |
| "rewards/margins": 0.0277099609375, |
| "rewards/rejected": -0.1474609375, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6367521367521367, |
| "grad_norm": 47.26237956889532, |
| "learning_rate": 2.458259799837735e-07, |
| "logits/chosen": -0.341796875, |
| "logits/rejected": -0.41015625, |
| "logps/chosen": -2.015625, |
| "logps/rejected": -1.9765625, |
| "loss": 0.9471, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.06298828125, |
| "rewards/margins": 0.03466796875, |
| "rewards/rejected": -0.09765625, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6388888888888888, |
| "grad_norm": 28.351687048174814, |
| "learning_rate": 2.4333549864848766e-07, |
| "logits/chosen": -0.51171875, |
| "logits/rejected": -0.44921875, |
| "logps/chosen": -0.71875, |
| "logps/rejected": -0.8515625, |
| "loss": 0.9233, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.03662109375, |
| "rewards/margins": 0.033447265625, |
| "rewards/rejected": -0.06982421875, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 23.496220784871824, |
| "learning_rate": 2.408509568618779e-07, |
| "logits/chosen": -0.70703125, |
| "logits/rejected": -0.671875, |
| "logps/chosen": -1.140625, |
| "logps/rejected": -1.3359375, |
| "loss": 0.8739, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.083984375, |
| "rewards/margins": 0.00604248046875, |
| "rewards/rejected": -0.09033203125, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "eval_logits/chosen": -0.58203125, |
| "eval_logits/rejected": -0.59375, |
| "eval_logps/chosen": -1.1171875, |
| "eval_logps/rejected": -1.1953125, |
| "eval_loss": 0.9189149141311646, |
| "eval_rewards/accuracies": 0.7096773982048035, |
| "eval_rewards/chosen": -0.0439453125, |
| "eval_rewards/margins": 0.05810546875, |
| "eval_rewards/rejected": -0.10205078125, |
| "eval_runtime": 102.2621, |
| "eval_samples_per_second": 19.176, |
| "eval_steps_per_second": 0.606, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6431623931623932, |
| "grad_norm": 47.08783752673241, |
| "learning_rate": 2.3837249297416086e-07, |
| "logits/chosen": -0.671875, |
| "logits/rejected": -0.703125, |
| "logps/chosen": -0.6328125, |
| "logps/rejected": -0.69140625, |
| "loss": 0.944, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.01507568359375, |
| "rewards/margins": 0.027099609375, |
| "rewards/rejected": -0.042236328125, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6452991452991453, |
| "grad_norm": 61.449734896636464, |
| "learning_rate": 2.3590024499710916e-07, |
| "logits/chosen": -0.671875, |
| "logits/rejected": -0.75390625, |
| "logps/chosen": -0.95703125, |
| "logps/rejected": -1.0546875, |
| "loss": 0.9629, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.08935546875, |
| "rewards/margins": 0.00823974609375, |
| "rewards/rejected": -0.09765625, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6474358974358975, |
| "grad_norm": 52.862250928964876, |
| "learning_rate": 2.3343435059636606e-07, |
| "logits/chosen": -0.7578125, |
| "logits/rejected": -0.70703125, |
| "logps/chosen": -2.171875, |
| "logps/rejected": -1.171875, |
| "loss": 1.0004, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.201171875, |
| "rewards/margins": -0.0400390625, |
| "rewards/rejected": -0.162109375, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6495726495726496, |
| "grad_norm": 28.629721734286697, |
| "learning_rate": 2.3097494708377977e-07, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.58203125, |
| "logps/chosen": -1.078125, |
| "logps/rejected": -1.2265625, |
| "loss": 0.8655, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0400390625, |
| "rewards/margins": 0.0380859375, |
| "rewards/rejected": -0.078125, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6517094017094017, |
| "grad_norm": 33.418704353017965, |
| "learning_rate": 2.285221714097575e-07, |
| "logits/chosen": -0.42578125, |
| "logits/rejected": -0.4375, |
| "logps/chosen": -1.96875, |
| "logps/rejected": -1.4375, |
| "loss": 0.8831, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0079345703125, |
| "rewards/margins": 0.10791015625, |
| "rewards/rejected": -0.1162109375, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6538461538461539, |
| "grad_norm": 129.10426681364987, |
| "learning_rate": 2.2607616015563896e-07, |
| "logits/chosen": -0.48046875, |
| "logits/rejected": -0.46484375, |
| "logps/chosen": -1.25, |
| "logps/rejected": -0.91015625, |
| "loss": 1.0697, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0830078125, |
| "rewards/margins": -0.0052490234375, |
| "rewards/rejected": -0.078125, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.655982905982906, |
| "grad_norm": 36.45007975348447, |
| "learning_rate": 2.2363704952609142e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.42578125, |
| "logps/chosen": -2.3125, |
| "logps/rejected": -1.390625, |
| "loss": 0.9263, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.046630859375, |
| "rewards/margins": 0.0126953125, |
| "rewards/rejected": -0.0595703125, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6581196581196581, |
| "grad_norm": 80.4627542411564, |
| "learning_rate": 2.2120497534152476e-07, |
| "logits/chosen": -0.3125, |
| "logits/rejected": -0.32421875, |
| "logps/chosen": -1.6015625, |
| "logps/rejected": -1.8125, |
| "loss": 0.9742, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.03271484375, |
| "rewards/margins": 0.0322265625, |
| "rewards/rejected": 0.0006103515625, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6602564102564102, |
| "grad_norm": 19.48595987582158, |
| "learning_rate": 2.1878007303052892e-07, |
| "logits/chosen": -0.640625, |
| "logits/rejected": -0.69140625, |
| "logps/chosen": -0.9375, |
| "logps/rejected": -1.484375, |
| "loss": 0.8988, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.00213623046875, |
| "rewards/margins": 0.06787109375, |
| "rewards/rejected": -0.0703125, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6623931623931624, |
| "grad_norm": 38.239536478841806, |
| "learning_rate": 2.1636247762233223e-07, |
| "logits/chosen": -0.765625, |
| "logits/rejected": -0.7890625, |
| "logps/chosen": -0.8515625, |
| "logps/rejected": -0.9453125, |
| "loss": 0.9138, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0634765625, |
| "rewards/margins": -0.0001220703125, |
| "rewards/rejected": -0.0634765625, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6645299145299145, |
| "grad_norm": 22.654040122814195, |
| "learning_rate": 2.1395232373928256e-07, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.54296875, |
| "logps/chosen": -0.8984375, |
| "logps/rejected": -0.88671875, |
| "loss": 0.9166, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.05322265625, |
| "rewards/margins": 0.01904296875, |
| "rewards/rejected": -0.072265625, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 22.814581881293506, |
| "learning_rate": 2.1154974558935087e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.53125, |
| "logps/chosen": -1.34375, |
| "logps/rejected": -1.7734375, |
| "loss": 0.8981, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.0419921875, |
| "rewards/margins": 0.042724609375, |
| "rewards/rejected": -0.00128173828125, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6688034188034188, |
| "grad_norm": 27.638929228638546, |
| "learning_rate": 2.091548769586581e-07, |
| "logits/chosen": -0.6171875, |
| "logits/rejected": -0.57421875, |
| "logps/chosen": -0.828125, |
| "logps/rejected": -0.703125, |
| "loss": 0.9036, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.039306640625, |
| "rewards/margins": 0.02685546875, |
| "rewards/rejected": -0.06640625, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6709401709401709, |
| "grad_norm": 25.374046730125965, |
| "learning_rate": 2.0676785120402512e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.494140625, |
| "logps/chosen": -0.58203125, |
| "logps/rejected": -0.54296875, |
| "loss": 0.943, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.055419921875, |
| "rewards/margins": 0.01275634765625, |
| "rewards/rejected": -0.068359375, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6730769230769231, |
| "grad_norm": 51.9089203355178, |
| "learning_rate": 2.043888012455471e-07, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -1.921875, |
| "logps/rejected": -0.796875, |
| "loss": 0.9812, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.099609375, |
| "rewards/margins": -0.044677734375, |
| "rewards/rejected": -0.05517578125, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6752136752136753, |
| "grad_norm": 29.258971220803748, |
| "learning_rate": 2.0201785955919153e-07, |
| "logits/chosen": -0.57421875, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -0.6640625, |
| "logps/rejected": -0.6640625, |
| "loss": 0.8953, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.05224609375, |
| "rewards/margins": 0.011474609375, |
| "rewards/rejected": -0.06396484375, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6773504273504274, |
| "grad_norm": 47.087186810023475, |
| "learning_rate": 1.9965515816942188e-07, |
| "logits/chosen": 0.0111083984375, |
| "logits/rejected": -0.103515625, |
| "logps/chosen": -0.6875, |
| "logps/rejected": -0.6875, |
| "loss": 0.9901, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.020263671875, |
| "rewards/margins": 0.04296875, |
| "rewards/rejected": -0.06298828125, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6794871794871795, |
| "grad_norm": 28.150562895031253, |
| "learning_rate": 1.9730082864184569e-07, |
| "logits/chosen": -0.7109375, |
| "logits/rejected": -0.75390625, |
| "logps/chosen": -0.921875, |
| "logps/rejected": -0.8671875, |
| "loss": 0.8884, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00335693359375, |
| "rewards/margins": 0.052734375, |
| "rewards/rejected": -0.055908203125, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6816239316239316, |
| "grad_norm": 37.95822554361127, |
| "learning_rate": 1.9495500207588803e-07, |
| "logits/chosen": -0.51953125, |
| "logits/rejected": -0.466796875, |
| "logps/chosen": -0.734375, |
| "logps/rejected": -0.8203125, |
| "loss": 0.9323, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0458984375, |
| "rewards/margins": 0.039794921875, |
| "rewards/rejected": -0.08544921875, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 17.346530667365943, |
| "learning_rate": 1.9261780909749204e-07, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.61328125, |
| "logps/chosen": -0.73828125, |
| "logps/rejected": -0.7578125, |
| "loss": 0.8615, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.03759765625, |
| "rewards/margins": 0.05859375, |
| "rewards/rejected": -0.095703125, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6858974358974359, |
| "grad_norm": 26.013086502242853, |
| "learning_rate": 1.9028937985184488e-07, |
| "logits/chosen": -0.271484375, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -1.1875, |
| "logps/rejected": -1.15625, |
| "loss": 0.8679, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.05908203125, |
| "rewards/margins": 0.037841796875, |
| "rewards/rejected": -0.09716796875, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.688034188034188, |
| "grad_norm": 23.10477919916546, |
| "learning_rate": 1.8796984399612961e-07, |
| "logits/chosen": -0.65234375, |
| "logits/rejected": -0.62890625, |
| "logps/chosen": -0.6484375, |
| "logps/rejected": -0.73046875, |
| "loss": 0.9015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.041015625, |
| "rewards/margins": 0.045166015625, |
| "rewards/rejected": -0.0859375, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6901709401709402, |
| "grad_norm": 40.346721902061745, |
| "learning_rate": 1.8565933069230723e-07, |
| "logits/chosen": -0.455078125, |
| "logits/rejected": -0.44921875, |
| "logps/chosen": -0.72265625, |
| "logps/rejected": -1.3203125, |
| "loss": 0.8928, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.078125, |
| "rewards/margins": 0.07373046875, |
| "rewards/rejected": -0.15234375, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 17.96847921628251, |
| "learning_rate": 1.8335796859992293e-07, |
| "logits/chosen": -0.470703125, |
| "logits/rejected": -0.4140625, |
| "logps/chosen": -1.0546875, |
| "logps/rejected": -0.99609375, |
| "loss": 0.8604, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.080078125, |
| "rewards/margins": 0.0283203125, |
| "rewards/rejected": -0.10791015625, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 28.95665753195518, |
| "learning_rate": 1.8106588586894203e-07, |
| "logits/chosen": -0.7421875, |
| "logits/rejected": -0.76171875, |
| "logps/chosen": -0.79296875, |
| "logps/rejected": -0.8828125, |
| "loss": 0.899, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.050048828125, |
| "rewards/margins": 0.0361328125, |
| "rewards/rejected": -0.0859375, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6965811965811965, |
| "grad_norm": 42.415256817420016, |
| "learning_rate": 1.7878321013261467e-07, |
| "logits/chosen": -0.59375, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -0.96875, |
| "logps/rejected": -1.15625, |
| "loss": 0.9105, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.05224609375, |
| "rewards/margins": 0.04052734375, |
| "rewards/rejected": -0.0927734375, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6987179487179487, |
| "grad_norm": 24.916024571168084, |
| "learning_rate": 1.765100685003675e-07, |
| "logits/chosen": -0.263671875, |
| "logits/rejected": -0.37109375, |
| "logps/chosen": -1.765625, |
| "logps/rejected": -0.9296875, |
| "loss": 0.8912, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.039306640625, |
| "rewards/margins": 0.042236328125, |
| "rewards/rejected": -0.08154296875, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7008547008547008, |
| "grad_norm": 38.23555158814467, |
| "learning_rate": 1.7424658755072684e-07, |
| "logits/chosen": -0.69140625, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -1.7421875, |
| "logps/rejected": -0.83984375, |
| "loss": 0.9182, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.02490234375, |
| "rewards/margins": 0.059326171875, |
| "rewards/rejected": -0.083984375, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7029914529914529, |
| "grad_norm": 71.67585045413601, |
| "learning_rate": 1.7199289332426963e-07, |
| "logits/chosen": -0.60546875, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -1.0546875, |
| "logps/rejected": -0.69140625, |
| "loss": 0.9418, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.11328125, |
| "rewards/margins": -0.042724609375, |
| "rewards/rejected": -0.0703125, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7051282051282052, |
| "grad_norm": 31.48093318308997, |
| "learning_rate": 1.697491113166047e-07, |
| "logits/chosen": -0.7421875, |
| "logits/rejected": -0.7265625, |
| "logps/chosen": -0.765625, |
| "logps/rejected": -1.2421875, |
| "loss": 0.9808, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0279541015625, |
| "rewards/margins": 0.080078125, |
| "rewards/rejected": -0.10791015625, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7072649572649573, |
| "grad_norm": 36.822454161010484, |
| "learning_rate": 1.6751536647138525e-07, |
| "logits/chosen": -0.5078125, |
| "logits/rejected": -0.498046875, |
| "logps/chosen": -1.21875, |
| "logps/rejected": -1.3359375, |
| "loss": 0.8826, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.06298828125, |
| "rewards/margins": 0.0595703125, |
| "rewards/rejected": -0.12255859375, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7094017094017094, |
| "grad_norm": 21.571313736776336, |
| "learning_rate": 1.652917831733509e-07, |
| "logits/chosen": -0.5625, |
| "logits/rejected": -0.53125, |
| "logps/chosen": -0.7578125, |
| "logps/rejected": -0.90234375, |
| "loss": 0.9144, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.044189453125, |
| "rewards/margins": 0.0361328125, |
| "rewards/rejected": -0.08056640625, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7115384615384616, |
| "grad_norm": 32.84234185812603, |
| "learning_rate": 1.6307848524140175e-07, |
| "logits/chosen": -0.5390625, |
| "logits/rejected": -0.494140625, |
| "logps/chosen": -1.015625, |
| "logps/rejected": -1.0390625, |
| "loss": 0.9741, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0830078125, |
| "rewards/margins": 0.0166015625, |
| "rewards/rejected": -0.099609375, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7136752136752137, |
| "grad_norm": 29.13502053907057, |
| "learning_rate": 1.6087559592170356e-07, |
| "logits/chosen": -0.498046875, |
| "logits/rejected": -0.515625, |
| "logps/chosen": -1.25, |
| "logps/rejected": -1.3203125, |
| "loss": 0.8452, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.1015625, |
| "rewards/margins": 0.06640625, |
| "rewards/rejected": -0.16796875, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7158119658119658, |
| "grad_norm": 37.18731356468967, |
| "learning_rate": 1.5868323788082462e-07, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.86328125, |
| "logps/rejected": -0.71875, |
| "loss": 0.9375, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.046875, |
| "rewards/margins": 0.01953125, |
| "rewards/rejected": -0.06640625, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 43.083953890628244, |
| "learning_rate": 1.5650153319890508e-07, |
| "logits/chosen": -0.63671875, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.86328125, |
| "logps/rejected": -1.3984375, |
| "loss": 0.8783, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.06591796875, |
| "rewards/margins": 0.07080078125, |
| "rewards/rejected": -0.13671875, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7200854700854701, |
| "grad_norm": 39.50465162692857, |
| "learning_rate": 1.543306033628597e-07, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.8828125, |
| "logps/rejected": -1.2265625, |
| "loss": 0.9142, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.062255859375, |
| "rewards/margins": 0.07861328125, |
| "rewards/rejected": -0.140625, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7222222222222222, |
| "grad_norm": 45.92938655648082, |
| "learning_rate": 1.5217056925961196e-07, |
| "logits/chosen": -0.58203125, |
| "logits/rejected": -0.59375, |
| "logps/chosen": -1.4765625, |
| "logps/rejected": -0.7734375, |
| "loss": 0.9757, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0306396484375, |
| "rewards/margins": 0.0390625, |
| "rewards/rejected": -0.06982421875, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7243589743589743, |
| "grad_norm": 31.162218682245538, |
| "learning_rate": 1.5002155116936342e-07, |
| "logits/chosen": -0.6640625, |
| "logits/rejected": -0.71875, |
| "logps/chosen": -0.9140625, |
| "logps/rejected": -1.03125, |
| "loss": 0.9158, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.072265625, |
| "rewards/margins": 0.0693359375, |
| "rewards/rejected": -0.1416015625, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7264957264957265, |
| "grad_norm": 53.49760741911357, |
| "learning_rate": 1.4788366875889506e-07, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.52734375, |
| "logps/chosen": -0.953125, |
| "logps/rejected": -0.8671875, |
| "loss": 0.944, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.08203125, |
| "rewards/margins": -0.0052490234375, |
| "rewards/rejected": -0.07666015625, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7286324786324786, |
| "grad_norm": 26.13264228294242, |
| "learning_rate": 1.4575704107490483e-07, |
| "logits/chosen": -0.640625, |
| "logits/rejected": -0.63671875, |
| "logps/chosen": -2.171875, |
| "logps/rejected": -1.96875, |
| "loss": 0.8617, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.060302734375, |
| "rewards/margins": 0.0751953125, |
| "rewards/rejected": -0.1357421875, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7307692307692307, |
| "grad_norm": 42.99292575995844, |
| "learning_rate": 1.4364178653737737e-07, |
| "logits/chosen": -0.6015625, |
| "logits/rejected": -0.59375, |
| "logps/chosen": -1.796875, |
| "logps/rejected": -0.8515625, |
| "loss": 0.9573, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.10400390625, |
| "rewards/margins": -0.0225830078125, |
| "rewards/rejected": -0.08154296875, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7329059829059829, |
| "grad_norm": 51.89913814066636, |
| "learning_rate": 1.4153802293299097e-07, |
| "logits/chosen": -0.3125, |
| "logits/rejected": -0.345703125, |
| "logps/chosen": -1.4609375, |
| "logps/rejected": -2.234375, |
| "loss": 0.8804, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.03173828125, |
| "rewards/margins": 0.07958984375, |
| "rewards/rejected": -0.111328125, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7350427350427351, |
| "grad_norm": 65.90958403692584, |
| "learning_rate": 1.3944586740855812e-07, |
| "logits/chosen": -0.68359375, |
| "logits/rejected": -0.66796875, |
| "logps/chosen": -0.98828125, |
| "logps/rejected": -2.015625, |
| "loss": 0.9264, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.083984375, |
| "rewards/margins": 0.0908203125, |
| "rewards/rejected": -0.1748046875, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7371794871794872, |
| "grad_norm": 27.869444404639868, |
| "learning_rate": 1.373654364645021e-07, |
| "logits/chosen": -0.65234375, |
| "logits/rejected": -0.703125, |
| "logps/chosen": -0.84765625, |
| "logps/rejected": -2.0625, |
| "loss": 0.8766, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.037353515625, |
| "rewards/margins": 0.08935546875, |
| "rewards/rejected": -0.126953125, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7393162393162394, |
| "grad_norm": 28.689619149528802, |
| "learning_rate": 1.3529684594837035e-07, |
| "logits/chosen": -0.6328125, |
| "logits/rejected": -0.62109375, |
| "logps/chosen": -1.2421875, |
| "logps/rejected": -1.7890625, |
| "loss": 0.8604, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0517578125, |
| "rewards/margins": 0.0400390625, |
| "rewards/rejected": -0.09228515625, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7414529914529915, |
| "grad_norm": 32.71728165861424, |
| "learning_rate": 1.3324021104838275e-07, |
| "logits/chosen": -0.56640625, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -1.8828125, |
| "logps/rejected": -3.171875, |
| "loss": 0.8964, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.049560546875, |
| "rewards/margins": 0.00396728515625, |
| "rewards/rejected": -0.053466796875, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7435897435897436, |
| "grad_norm": 34.24589371209163, |
| "learning_rate": 1.3119564628701822e-07, |
| "logits/chosen": -0.337890625, |
| "logits/rejected": -0.41015625, |
| "logps/chosen": -1.34375, |
| "logps/rejected": -1.6875, |
| "loss": 0.8834, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.023193359375, |
| "rewards/margins": 0.0859375, |
| "rewards/rejected": -0.10888671875, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7457264957264957, |
| "grad_norm": 13.972227869423218, |
| "learning_rate": 1.2916326551463748e-07, |
| "logits/chosen": -0.8359375, |
| "logits/rejected": -0.8046875, |
| "logps/chosen": -0.83984375, |
| "logps/rejected": -0.765625, |
| "loss": 0.8831, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.05224609375, |
| "rewards/margins": 0.042236328125, |
| "rewards/rejected": -0.09423828125, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7478632478632479, |
| "grad_norm": 39.05059309519189, |
| "learning_rate": 1.2714318190314227e-07, |
| "logits/chosen": -0.57421875, |
| "logits/rejected": -0.7109375, |
| "logps/chosen": -0.8828125, |
| "logps/rejected": -1.5234375, |
| "loss": 0.9318, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.049072265625, |
| "rewards/margins": -0.0107421875, |
| "rewards/rejected": -0.0380859375, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 30.774060476068694, |
| "learning_rate": 1.251355079396752e-07, |
| "logits/chosen": -0.625, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -1.8828125, |
| "logps/rejected": -0.8515625, |
| "loss": 0.8772, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0673828125, |
| "rewards/margins": 0.05517578125, |
| "rewards/rejected": -0.1220703125, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7521367521367521, |
| "grad_norm": 27.8606967937537, |
| "learning_rate": 1.2314035542035478e-07, |
| "logits/chosen": -0.65234375, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -1.265625, |
| "logps/rejected": -0.85546875, |
| "loss": 0.8914, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.08837890625, |
| "rewards/margins": -0.00115966796875, |
| "rewards/rejected": -0.0869140625, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7542735042735043, |
| "grad_norm": 23.469754541397073, |
| "learning_rate": 1.2115783544405026e-07, |
| "logits/chosen": -0.45703125, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -0.8984375, |
| "logps/rejected": -1.65625, |
| "loss": 0.8776, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.03564453125, |
| "rewards/margins": 0.1083984375, |
| "rewards/rejected": -0.1435546875, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7564102564102564, |
| "grad_norm": 61.11898109311696, |
| "learning_rate": 1.1918805840619561e-07, |
| "logits/chosen": -0.412109375, |
| "logits/rejected": -0.54296875, |
| "logps/chosen": -0.6875, |
| "logps/rejected": -2.21875, |
| "loss": 0.983, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0537109375, |
| "rewards/margins": 0.11328125, |
| "rewards/rejected": -0.1669921875, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7585470085470085, |
| "grad_norm": 34.686715289245434, |
| "learning_rate": 1.1723113399264162e-07, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.45703125, |
| "logps/chosen": -0.8046875, |
| "logps/rejected": -0.90625, |
| "loss": 0.8938, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.0595703125, |
| "rewards/margins": 0.078125, |
| "rewards/rejected": -0.1376953125, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7606837606837606, |
| "grad_norm": 28.581977784197978, |
| "learning_rate": 1.1528717117354865e-07, |
| "logits/chosen": -0.57421875, |
| "logits/rejected": -0.625, |
| "logps/chosen": -1.5078125, |
| "logps/rejected": -2.46875, |
| "loss": 0.8634, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.0537109375, |
| "rewards/margins": 0.0556640625, |
| "rewards/rejected": -0.109375, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7628205128205128, |
| "grad_norm": 35.61439045102826, |
| "learning_rate": 1.1335627819731852e-07, |
| "logits/chosen": -0.4375, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -0.97265625, |
| "logps/rejected": -1.0234375, |
| "loss": 0.8699, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.10693359375, |
| "rewards/margins": 0.0096435546875, |
| "rewards/rejected": -0.11669921875, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7649572649572649, |
| "grad_norm": 94.57763593516974, |
| "learning_rate": 1.114385625845664e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -0.76171875, |
| "logps/rejected": -0.921875, |
| "loss": 1.0251, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0771484375, |
| "rewards/margins": 0.0712890625, |
| "rewards/rejected": -0.1484375, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7670940170940171, |
| "grad_norm": 41.86614792413909, |
| "learning_rate": 1.0953413112213418e-07, |
| "logits/chosen": -0.296875, |
| "logits/rejected": -0.259765625, |
| "logps/chosen": -0.890625, |
| "logps/rejected": -1.71875, |
| "loss": 0.9625, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0458984375, |
| "rewards/margins": -0.00421142578125, |
| "rewards/rejected": -0.041748046875, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 65.35514474408973, |
| "learning_rate": 1.0764308985714354e-07, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -1.0390625, |
| "logps/rejected": -0.69140625, |
| "loss": 1.0248, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.017333984375, |
| "rewards/margins": 0.072265625, |
| "rewards/rejected": -0.08935546875, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7713675213675214, |
| "grad_norm": 21.30177814317387, |
| "learning_rate": 1.0576554409109134e-07, |
| "logits/chosen": -0.51171875, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -0.71875, |
| "loss": 0.9224, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.047119140625, |
| "rewards/margins": 0.017333984375, |
| "rewards/rejected": -0.064453125, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7735042735042735, |
| "grad_norm": 20.90088638719811, |
| "learning_rate": 1.039015983739857e-07, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.46875, |
| "logps/chosen": -0.75, |
| "logps/rejected": -1.4609375, |
| "loss": 0.9038, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.06591796875, |
| "rewards/margins": 0.054443359375, |
| "rewards/rejected": -0.12060546875, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7756410256410257, |
| "grad_norm": 23.552172762978927, |
| "learning_rate": 1.0205135649852387e-07, |
| "logits/chosen": -0.37109375, |
| "logits/rejected": -0.5234375, |
| "logps/chosen": -0.90234375, |
| "logps/rejected": -1.28125, |
| "loss": 0.8752, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.04638671875, |
| "rewards/margins": 0.0576171875, |
| "rewards/rejected": -0.103515625, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 43.97955125787066, |
| "learning_rate": 1.0021492149431308e-07, |
| "logits/chosen": -0.55078125, |
| "logits/rejected": -0.62890625, |
| "logps/chosen": -1.2890625, |
| "logps/rejected": -1.65625, |
| "loss": 0.8794, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.016357421875, |
| "rewards/margins": 0.1025390625, |
| "rewards/rejected": -0.11865234375, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7799145299145299, |
| "grad_norm": 17.611224627975545, |
| "learning_rate": 9.839239562213343e-08, |
| "logits/chosen": -0.26953125, |
| "logits/rejected": -0.322265625, |
| "logps/chosen": -0.9765625, |
| "logps/rejected": -1.171875, |
| "loss": 0.8906, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.044677734375, |
| "rewards/margins": 0.04443359375, |
| "rewards/rejected": -0.0888671875, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.782051282051282, |
| "grad_norm": 34.066165086726, |
| "learning_rate": 9.658388036824293e-08, |
| "logits/chosen": -0.60546875, |
| "logits/rejected": -0.671875, |
| "logps/chosen": -0.70703125, |
| "logps/rejected": -1.3203125, |
| "loss": 0.8866, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.050048828125, |
| "rewards/margins": 0.052734375, |
| "rewards/rejected": -0.10302734375, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7841880341880342, |
| "grad_norm": 21.579122943736486, |
| "learning_rate": 9.478947643872724e-08, |
| "logits/chosen": -0.5625, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -1.8203125, |
| "logps/rejected": -1.4921875, |
| "loss": 0.8434, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0888671875, |
| "rewards/margins": 0.007568359375, |
| "rewards/rejected": -0.0966796875, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7863247863247863, |
| "grad_norm": 27.05462584053435, |
| "learning_rate": 9.300928375389093e-08, |
| "logits/chosen": -0.50390625, |
| "logits/rejected": -0.44921875, |
| "logps/chosen": -1.953125, |
| "logps/rejected": -1.0390625, |
| "loss": 0.8708, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.09375, |
| "rewards/margins": 0.01104736328125, |
| "rewards/rejected": -0.10498046875, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7884615384615384, |
| "grad_norm": 29.745532951341836, |
| "learning_rate": 9.124340144269419e-08, |
| "logits/chosen": -0.55859375, |
| "logits/rejected": -0.56640625, |
| "logps/chosen": -1.078125, |
| "logps/rejected": -1.4453125, |
| "loss": 0.8735, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.05908203125, |
| "rewards/margins": 0.0732421875, |
| "rewards/rejected": -0.1328125, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7905982905982906, |
| "grad_norm": 36.32061374606974, |
| "learning_rate": 8.949192783723277e-08, |
| "logits/chosen": -0.796875, |
| "logits/rejected": -0.7265625, |
| "logps/chosen": -0.7265625, |
| "logps/rejected": -0.7734375, |
| "loss": 0.9109, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0732421875, |
| "rewards/margins": 0.02685546875, |
| "rewards/rejected": -0.10009765625, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7927350427350427, |
| "grad_norm": 35.858803281032266, |
| "learning_rate": 8.775496046726193e-08, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.58203125, |
| "logps/chosen": -1.125, |
| "logps/rejected": -1.0234375, |
| "loss": 0.8995, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.06005859375, |
| "rewards/margins": 0.0146484375, |
| "rewards/rejected": -0.07470703125, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7948717948717948, |
| "grad_norm": 101.49292897549103, |
| "learning_rate": 8.603259605476635e-08, |
| "logits/chosen": -0.66015625, |
| "logits/rejected": -0.640625, |
| "logps/chosen": -0.9453125, |
| "logps/rejected": -1.2265625, |
| "loss": 0.9249, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.056640625, |
| "rewards/margins": 0.003662109375, |
| "rewards/rejected": -0.06005859375, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7970085470085471, |
| "grad_norm": 18.221625405676257, |
| "learning_rate": 8.43249305085733e-08, |
| "logits/chosen": -0.640625, |
| "logits/rejected": -0.61328125, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -0.76953125, |
| "loss": 0.853, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.04736328125, |
| "rewards/margins": 0.055908203125, |
| "rewards/rejected": -0.103515625, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7991452991452992, |
| "grad_norm": 27.286873106336177, |
| "learning_rate": 8.263205891901301e-08, |
| "logits/chosen": -0.55078125, |
| "logits/rejected": -0.58984375, |
| "logps/chosen": -1.0703125, |
| "logps/rejected": -1.515625, |
| "loss": 0.8521, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.033935546875, |
| "rewards/margins": 0.0888671875, |
| "rewards/rejected": -0.12255859375, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8012820512820513, |
| "grad_norm": 28.665743004887997, |
| "learning_rate": 8.095407555262294e-08, |
| "logits/chosen": -0.4375, |
| "logits/rejected": -0.416015625, |
| "logps/chosen": -1.0390625, |
| "logps/rejected": -1.21875, |
| "loss": 0.885, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.05322265625, |
| "rewards/margins": 0.040771484375, |
| "rewards/rejected": -0.09423828125, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8034188034188035, |
| "grad_norm": 34.61683957680185, |
| "learning_rate": 7.929107384689855e-08, |
| "logits/chosen": -0.64453125, |
| "logits/rejected": -0.640625, |
| "logps/chosen": -0.81640625, |
| "logps/rejected": -0.82421875, |
| "loss": 0.8433, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.056884765625, |
| "rewards/margins": 0.0213623046875, |
| "rewards/rejected": -0.078125, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8055555555555556, |
| "grad_norm": 27.203509280102132, |
| "learning_rate": 7.764314640509094e-08, |
| "logits/chosen": -0.8203125, |
| "logits/rejected": -0.7890625, |
| "logps/chosen": -2.46875, |
| "logps/rejected": -1.703125, |
| "loss": 0.932, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0361328125, |
| "rewards/margins": 0.041748046875, |
| "rewards/rejected": -0.07763671875, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8076923076923077, |
| "grad_norm": 69.76164123890895, |
| "learning_rate": 7.601038499104956e-08, |
| "logits/chosen": -0.5234375, |
| "logits/rejected": -0.494140625, |
| "logps/chosen": -1.609375, |
| "logps/rejected": -1.890625, |
| "loss": 0.9048, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.023681640625, |
| "rewards/margins": 0.06396484375, |
| "rewards/rejected": -0.08740234375, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8098290598290598, |
| "grad_norm": 16.95114226496351, |
| "learning_rate": 7.439288052411272e-08, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -1.0078125, |
| "logps/rejected": -0.859375, |
| "loss": 0.8966, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.022216796875, |
| "rewards/margins": 0.059326171875, |
| "rewards/rejected": -0.08154296875, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.811965811965812, |
| "grad_norm": 22.739756048836032, |
| "learning_rate": 7.279072307404507e-08, |
| "logits/chosen": -0.63671875, |
| "logits/rejected": -0.6484375, |
| "logps/chosen": -0.90625, |
| "logps/rejected": -1.7265625, |
| "loss": 0.8435, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.06494140625, |
| "rewards/margins": 0.040771484375, |
| "rewards/rejected": -0.10595703125, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8141025641025641, |
| "grad_norm": 26.571323472554763, |
| "learning_rate": 7.120400185602155e-08, |
| "logits/chosen": -0.267578125, |
| "logits/rejected": -0.220703125, |
| "logps/chosen": -0.7890625, |
| "logps/rejected": -1.359375, |
| "loss": 0.9124, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.062255859375, |
| "rewards/margins": 0.00299072265625, |
| "rewards/rejected": -0.0654296875, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8162393162393162, |
| "grad_norm": 36.79009357491132, |
| "learning_rate": 6.963280522565996e-08, |
| "logits/chosen": -0.421875, |
| "logits/rejected": -0.376953125, |
| "logps/chosen": -1.1640625, |
| "logps/rejected": -1.3046875, |
| "loss": 0.863, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0703125, |
| "rewards/margins": 0.05419921875, |
| "rewards/rejected": -0.12451171875, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8183760683760684, |
| "grad_norm": 29.76450789194752, |
| "learning_rate": 6.807722067410082e-08, |
| "logits/chosen": -0.1787109375, |
| "logits/rejected": -0.27734375, |
| "logps/chosen": -0.7265625, |
| "logps/rejected": -0.78125, |
| "loss": 0.9323, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0361328125, |
| "rewards/margins": 0.057373046875, |
| "rewards/rejected": -0.09375, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 35.13396843518851, |
| "learning_rate": 6.653733482313519e-08, |
| "logits/chosen": -0.44140625, |
| "logits/rejected": -0.421875, |
| "logps/chosen": -0.703125, |
| "logps/rejected": -1.3984375, |
| "loss": 0.8912, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0556640625, |
| "rewards/margins": 0.095703125, |
| "rewards/rejected": -0.15234375, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8226495726495726, |
| "grad_norm": 53.4634091517358, |
| "learning_rate": 6.501323342038164e-08, |
| "logits/chosen": -0.466796875, |
| "logits/rejected": -0.51171875, |
| "logps/chosen": -0.84765625, |
| "logps/rejected": -1.09375, |
| "loss": 0.9086, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.06640625, |
| "rewards/margins": 0.0771484375, |
| "rewards/rejected": -0.1435546875, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8247863247863247, |
| "grad_norm": 60.84615207783601, |
| "learning_rate": 6.350500133451102e-08, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.671875, |
| "logps/chosen": -1.953125, |
| "logps/rejected": -1.5703125, |
| "loss": 0.8564, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.033447265625, |
| "rewards/margins": 0.13671875, |
| "rewards/rejected": -0.169921875, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8269230769230769, |
| "grad_norm": 17.914476410603207, |
| "learning_rate": 6.201272255052099e-08, |
| "logits/chosen": -0.0986328125, |
| "logits/rejected": -0.10888671875, |
| "logps/chosen": -0.73828125, |
| "logps/rejected": -0.83984375, |
| "loss": 0.8545, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.03173828125, |
| "rewards/margins": 0.055419921875, |
| "rewards/rejected": -0.08740234375, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8290598290598291, |
| "grad_norm": 21.49907230153767, |
| "learning_rate": 6.05364801650592e-08, |
| "logits/chosen": -0.74609375, |
| "logits/rejected": -0.6875, |
| "logps/chosen": -0.76171875, |
| "logps/rejected": -1.28125, |
| "loss": 0.8809, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.07666015625, |
| "rewards/margins": 0.020751953125, |
| "rewards/rejected": -0.09716796875, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8311965811965812, |
| "grad_norm": 61.47517825652869, |
| "learning_rate": 5.907635638179577e-08, |
| "logits/chosen": -0.66796875, |
| "logits/rejected": -0.6328125, |
| "logps/chosen": -0.71875, |
| "logps/rejected": -0.75, |
| "loss": 0.9121, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.07666015625, |
| "rewards/margins": 0.039794921875, |
| "rewards/rejected": -0.1162109375, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 72.42429142086857, |
| "learning_rate": 5.763243250684664e-08, |
| "logits/chosen": -0.69140625, |
| "logits/rejected": -0.6875, |
| "logps/chosen": -1.4375, |
| "logps/rejected": -2.484375, |
| "loss": 1.0898, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.0439453125, |
| "rewards/margins": 0.2470703125, |
| "rewards/rejected": -0.291015625, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8354700854700855, |
| "grad_norm": 29.995287539704513, |
| "learning_rate": 5.6204788944245117e-08, |
| "logits/chosen": -0.703125, |
| "logits/rejected": -0.70703125, |
| "logps/chosen": -1.03125, |
| "logps/rejected": -2.1875, |
| "loss": 0.8693, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0478515625, |
| "rewards/margins": 0.14453125, |
| "rewards/rejected": -0.1923828125, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8376068376068376, |
| "grad_norm": 18.326694038182005, |
| "learning_rate": 5.479350519146523e-08, |
| "logits/chosen": -0.46875, |
| "logits/rejected": -0.56640625, |
| "logps/chosen": -0.78125, |
| "logps/rejected": -1.1015625, |
| "loss": 0.7939, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0576171875, |
| "rewards/margins": 0.080078125, |
| "rewards/rejected": -0.138671875, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8397435897435898, |
| "grad_norm": 43.4344545311177, |
| "learning_rate": 5.3398659834995075e-08, |
| "logits/chosen": -0.56640625, |
| "logits/rejected": -0.6328125, |
| "logps/chosen": -0.82421875, |
| "logps/rejected": -0.8984375, |
| "loss": 0.9371, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.080078125, |
| "rewards/margins": 0.03173828125, |
| "rewards/rejected": -0.11181640625, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8418803418803419, |
| "grad_norm": 54.049826393890534, |
| "learning_rate": 5.202033054596012e-08, |
| "logits/chosen": -0.796875, |
| "logits/rejected": -0.703125, |
| "logps/chosen": -1.3671875, |
| "logps/rejected": -1.5, |
| "loss": 0.892, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0220947265625, |
| "rewards/margins": 0.076171875, |
| "rewards/rejected": -0.0986328125, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.844017094017094, |
| "grad_norm": 20.13102225768583, |
| "learning_rate": 5.0658594075799e-08, |
| "logits/chosen": -0.6796875, |
| "logits/rejected": -0.671875, |
| "logps/chosen": -1.765625, |
| "logps/rejected": -2.875, |
| "loss": 0.8788, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0277099609375, |
| "rewards/margins": 0.06982421875, |
| "rewards/rejected": -0.09765625, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 28.927848143732405, |
| "learning_rate": 4.931352625198872e-08, |
| "logits/chosen": -0.75390625, |
| "logits/rejected": -0.7109375, |
| "logps/chosen": -0.859375, |
| "logps/rejected": -0.7109375, |
| "loss": 0.8744, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.046630859375, |
| "rewards/margins": 0.04052734375, |
| "rewards/rejected": -0.0869140625, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8482905982905983, |
| "grad_norm": 63.12214875940479, |
| "learning_rate": 4.7985201973823056e-08, |
| "logits/chosen": -0.61328125, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -0.671875, |
| "logps/rejected": -0.9609375, |
| "loss": 0.9304, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.076171875, |
| "rewards/margins": 0.0400390625, |
| "rewards/rejected": -0.1162109375, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8504273504273504, |
| "grad_norm": 44.643489074895264, |
| "learning_rate": 4.6673695208241485e-08, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.5546875, |
| "logps/chosen": -0.9140625, |
| "logps/rejected": -1.0546875, |
| "loss": 0.887, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.05322265625, |
| "rewards/margins": 0.0244140625, |
| "rewards/rejected": -0.07763671875, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8525641025641025, |
| "grad_norm": 39.04412527761531, |
| "learning_rate": 4.53790789857102e-08, |
| "logits/chosen": -0.76171875, |
| "logits/rejected": -0.76171875, |
| "logps/chosen": -1.1796875, |
| "logps/rejected": -1.625, |
| "loss": 0.9543, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0966796875, |
| "rewards/margins": 0.0263671875, |
| "rewards/rejected": -0.123046875, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 27.42452165554953, |
| "learning_rate": 4.41014253961559e-08, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.53125, |
| "logps/chosen": -1.078125, |
| "logps/rejected": -1.4453125, |
| "loss": 0.8307, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0810546875, |
| "rewards/margins": 0.083984375, |
| "rewards/rejected": -0.1650390625, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "eval_logits/chosen": -0.59765625, |
| "eval_logits/rejected": -0.609375, |
| "eval_logps/chosen": -1.1640625, |
| "eval_logps/rejected": -1.265625, |
| "eval_loss": 0.8950571417808533, |
| "eval_rewards/accuracies": 0.7338709831237793, |
| "eval_rewards/chosen": -0.068359375, |
| "eval_rewards/margins": 0.06982421875, |
| "eval_rewards/rejected": -0.138671875, |
| "eval_runtime": 101.1209, |
| "eval_samples_per_second": 19.393, |
| "eval_steps_per_second": 0.613, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8568376068376068, |
| "grad_norm": 33.032240773717284, |
| "learning_rate": 4.2840805584951014e-08, |
| "logits/chosen": -0.48828125, |
| "logits/rejected": -0.48046875, |
| "logps/chosen": -0.796875, |
| "logps/rejected": -0.8359375, |
| "loss": 0.8789, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.07421875, |
| "rewards/margins": 0.04248046875, |
| "rewards/rejected": -0.1162109375, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8589743589743589, |
| "grad_norm": 31.63013982511431, |
| "learning_rate": 4.159728974895238e-08, |
| "logits/chosen": -0.7578125, |
| "logits/rejected": -0.76953125, |
| "logps/chosen": -0.8828125, |
| "logps/rejected": -0.71875, |
| "loss": 0.8907, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0908203125, |
| "rewards/margins": 0.02392578125, |
| "rewards/rejected": -0.115234375, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8611111111111112, |
| "grad_norm": 48.908225956606366, |
| "learning_rate": 4.037094713259238e-08, |
| "logits/chosen": -0.640625, |
| "logits/rejected": -0.625, |
| "logps/chosen": -0.65234375, |
| "logps/rejected": -1.640625, |
| "loss": 0.9325, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0341796875, |
| "rewards/margins": -0.00103759765625, |
| "rewards/rejected": -0.032958984375, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.8632478632478633, |
| "grad_norm": 56.81805342384158, |
| "learning_rate": 3.91618460240227e-08, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -0.9140625, |
| "logps/rejected": -1.046875, |
| "loss": 0.9548, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.080078125, |
| "rewards/margins": 0.059814453125, |
| "rewards/rejected": -0.140625, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8653846153846154, |
| "grad_norm": 36.45563015025386, |
| "learning_rate": 3.797005375131227e-08, |
| "logits/chosen": -0.447265625, |
| "logits/rejected": -0.33984375, |
| "logps/chosen": -0.7734375, |
| "logps/rejected": -0.8203125, |
| "loss": 0.9068, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0703125, |
| "rewards/margins": 0.009521484375, |
| "rewards/rejected": -0.080078125, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8675213675213675, |
| "grad_norm": 29.66434209928639, |
| "learning_rate": 3.6795636678697766e-08, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.68359375, |
| "logps/chosen": -1.4609375, |
| "logps/rejected": -1.5703125, |
| "loss": 0.8785, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.111328125, |
| "rewards/margins": 0.036865234375, |
| "rewards/rejected": -0.1484375, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8696581196581197, |
| "grad_norm": 50.48136026330252, |
| "learning_rate": 3.563866020288821e-08, |
| "logits/chosen": -0.625, |
| "logits/rejected": -0.578125, |
| "logps/chosen": -2.46875, |
| "logps/rejected": -2.40625, |
| "loss": 0.9175, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.076171875, |
| "rewards/margins": 0.046875, |
| "rewards/rejected": -0.12353515625, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 31.90288630862614, |
| "learning_rate": 3.449918874942371e-08, |
| "logits/chosen": -0.455078125, |
| "logits/rejected": -0.443359375, |
| "logps/chosen": -1.9296875, |
| "logps/rejected": -1.859375, |
| "loss": 0.9159, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0732421875, |
| "rewards/margins": 0.002899169921875, |
| "rewards/rejected": -0.076171875, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8739316239316239, |
| "grad_norm": 20.338754487967687, |
| "learning_rate": 3.337728576908747e-08, |
| "logits/chosen": -0.546875, |
| "logits/rejected": -0.44140625, |
| "logps/chosen": -0.6640625, |
| "logps/rejected": -0.68359375, |
| "loss": 0.8912, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.056396484375, |
| "rewards/margins": 0.0205078125, |
| "rewards/rejected": -0.0771484375, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8760683760683761, |
| "grad_norm": 30.746459120515787, |
| "learning_rate": 3.2273013734373e-08, |
| "logits/chosen": -0.337890625, |
| "logits/rejected": -0.462890625, |
| "logps/chosen": -1.0703125, |
| "logps/rejected": -1.1171875, |
| "loss": 0.8478, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.044921875, |
| "rewards/margins": 0.026123046875, |
| "rewards/rejected": -0.0712890625, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8782051282051282, |
| "grad_norm": 21.508129319749067, |
| "learning_rate": 3.11864341360052e-08, |
| "logits/chosen": -0.5625, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -1.03125, |
| "logps/rejected": -1.1015625, |
| "loss": 0.8339, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.119140625, |
| "rewards/margins": 0.06689453125, |
| "rewards/rejected": -0.1865234375, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8803418803418803, |
| "grad_norm": 76.39894216110301, |
| "learning_rate": 3.0117607479516015e-08, |
| "logits/chosen": -0.486328125, |
| "logits/rejected": -0.458984375, |
| "logps/chosen": -0.69140625, |
| "logps/rejected": -0.82421875, |
| "loss": 0.9626, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.045654296875, |
| "rewards/margins": 0.0556640625, |
| "rewards/rejected": -0.1015625, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8824786324786325, |
| "grad_norm": 35.131873970074196, |
| "learning_rate": 2.9066593281875916e-08, |
| "logits/chosen": -0.81640625, |
| "logits/rejected": -0.7578125, |
| "logps/chosen": -0.98046875, |
| "logps/rejected": -1.25, |
| "loss": 0.8566, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.056396484375, |
| "rewards/margins": 0.025634765625, |
| "rewards/rejected": -0.08203125, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8846153846153846, |
| "grad_norm": 30.913338192487096, |
| "learning_rate": 2.8033450068178878e-08, |
| "logits/chosen": -0.25, |
| "logits/rejected": -0.1845703125, |
| "logps/chosen": -1.234375, |
| "logps/rejected": -1.375, |
| "loss": 0.8087, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.11328125, |
| "rewards/margins": 0.062255859375, |
| "rewards/rejected": -0.17578125, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8867521367521367, |
| "grad_norm": 50.75341001418908, |
| "learning_rate": 2.7018235368384134e-08, |
| "logits/chosen": -0.375, |
| "logits/rejected": -0.466796875, |
| "logps/chosen": -1.859375, |
| "logps/rejected": -1.9609375, |
| "loss": 0.9288, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.09375, |
| "rewards/margins": 0.111328125, |
| "rewards/rejected": -0.205078125, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 39.95487638253479, |
| "learning_rate": 2.6021005714112375e-08, |
| "logits/chosen": -0.734375, |
| "logits/rejected": -0.7734375, |
| "logps/chosen": -0.84765625, |
| "logps/rejected": -1.171875, |
| "loss": 0.8656, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.06005859375, |
| "rewards/margins": 0.060546875, |
| "rewards/rejected": -0.12060546875, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8910256410256411, |
| "grad_norm": 19.38742446704256, |
| "learning_rate": 2.5041816635497703e-08, |
| "logits/chosen": -0.55078125, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -1.7109375, |
| "logps/rejected": -1.1875, |
| "loss": 0.8466, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0289306640625, |
| "rewards/margins": 0.078125, |
| "rewards/rejected": -0.04931640625, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8931623931623932, |
| "grad_norm": 24.217067390108564, |
| "learning_rate": 2.408072265809576e-08, |
| "logits/chosen": -0.5859375, |
| "logits/rejected": -0.546875, |
| "logps/chosen": -0.7109375, |
| "logps/rejected": -0.9375, |
| "loss": 0.8562, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.060546875, |
| "rewards/margins": 0.050537109375, |
| "rewards/rejected": -0.111328125, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8952991452991453, |
| "grad_norm": 36.092412288856494, |
| "learning_rate": 2.313777729984726e-08, |
| "logits/chosen": -0.423828125, |
| "logits/rejected": -0.302734375, |
| "logps/chosen": -0.75, |
| "logps/rejected": -1.5234375, |
| "loss": 0.9023, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.10693359375, |
| "rewards/margins": 0.1015625, |
| "rewards/rejected": -0.2080078125, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 27.603997759182036, |
| "learning_rate": 2.221303306809788e-08, |
| "logits/chosen": -0.546875, |
| "logits/rejected": -0.55859375, |
| "logps/chosen": -0.6640625, |
| "logps/rejected": -0.734375, |
| "loss": 0.892, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0732421875, |
| "rewards/margins": 0.0302734375, |
| "rewards/rejected": -0.10400390625, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8995726495726496, |
| "grad_norm": 26.452313301173593, |
| "learning_rate": 2.1306541456674736e-08, |
| "logits/chosen": -0.5703125, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -0.8515625, |
| "logps/rejected": -0.9765625, |
| "loss": 0.877, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0225830078125, |
| "rewards/margins": 0.054931640625, |
| "rewards/rejected": -0.07763671875, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9017094017094017, |
| "grad_norm": 27.150619563751665, |
| "learning_rate": 2.0418352943018497e-08, |
| "logits/chosen": -0.49609375, |
| "logits/rejected": -0.53515625, |
| "logps/chosen": -0.94921875, |
| "logps/rejected": -0.953125, |
| "loss": 0.877, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.1357421875, |
| "rewards/margins": 0.017333984375, |
| "rewards/rejected": -0.1533203125, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9038461538461539, |
| "grad_norm": 25.600194103146695, |
| "learning_rate": 1.9548516985372982e-08, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.47265625, |
| "logps/chosen": -0.66796875, |
| "logps/rejected": -1.046875, |
| "loss": 0.8555, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.057861328125, |
| "rewards/margins": 0.0595703125, |
| "rewards/rejected": -0.1171875, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.905982905982906, |
| "grad_norm": 62.612070797475276, |
| "learning_rate": 1.869708202003093e-08, |
| "logits/chosen": -0.8046875, |
| "logits/rejected": -0.77734375, |
| "logps/chosen": -1.0703125, |
| "logps/rejected": -1.296875, |
| "loss": 0.943, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.055908203125, |
| "rewards/margins": 0.11328125, |
| "rewards/rejected": -0.169921875, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9081196581196581, |
| "grad_norm": 27.367161811022896, |
| "learning_rate": 1.7864095458636836e-08, |
| "logits/chosen": -0.55078125, |
| "logits/rejected": -0.48046875, |
| "logps/chosen": -0.90625, |
| "logps/rejected": -0.7890625, |
| "loss": 0.8156, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0751953125, |
| "rewards/margins": 0.03564453125, |
| "rewards/rejected": -0.1103515625, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9102564102564102, |
| "grad_norm": 29.270727979912408, |
| "learning_rate": 1.7049603685546986e-08, |
| "logits/chosen": -0.828125, |
| "logits/rejected": -0.765625, |
| "logps/chosen": -0.7734375, |
| "logps/rejected": -0.7734375, |
| "loss": 0.8855, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.080078125, |
| "rewards/margins": 0.028076171875, |
| "rewards/rejected": -0.10791015625, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9123931623931624, |
| "grad_norm": 26.620286341772662, |
| "learning_rate": 1.6253652055246357e-08, |
| "logits/chosen": -0.62109375, |
| "logits/rejected": -0.59375, |
| "logps/chosen": -1.1171875, |
| "logps/rejected": -1.125, |
| "loss": 0.8418, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.049560546875, |
| "rewards/margins": 0.04931640625, |
| "rewards/rejected": -0.0986328125, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9145299145299145, |
| "grad_norm": 31.429203131736777, |
| "learning_rate": 1.5476284889823315e-08, |
| "logits/chosen": -0.455078125, |
| "logits/rejected": -0.55859375, |
| "logps/chosen": -1.0546875, |
| "logps/rejected": -2.609375, |
| "loss": 0.8818, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.03515625, |
| "rewards/margins": 0.125, |
| "rewards/rejected": -0.16015625, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 25.327941068879277, |
| "learning_rate": 1.4717545476501487e-08, |
| "logits/chosen": -0.484375, |
| "logits/rejected": -0.58203125, |
| "logps/chosen": -0.8203125, |
| "logps/rejected": -0.86328125, |
| "loss": 0.8188, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0830078125, |
| "rewards/margins": 0.056396484375, |
| "rewards/rejected": -0.1396484375, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9188034188034188, |
| "grad_norm": 36.762939345250295, |
| "learning_rate": 1.3977476065229216e-08, |
| "logits/chosen": -0.70703125, |
| "logits/rejected": -0.66015625, |
| "logps/chosen": -0.8046875, |
| "logps/rejected": -1.171875, |
| "loss": 0.8722, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.07177734375, |
| "rewards/margins": -3.0517578125e-05, |
| "rewards/rejected": -0.07177734375, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9209401709401709, |
| "grad_norm": 47.33246317581902, |
| "learning_rate": 1.3256117866327116e-08, |
| "logits/chosen": -0.6015625, |
| "logits/rejected": -0.6328125, |
| "logps/chosen": -1.78125, |
| "logps/rejected": -1.125, |
| "loss": 0.9324, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.107421875, |
| "rewards/margins": 0.041015625, |
| "rewards/rejected": -0.1484375, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 37.81514750899869, |
| "learning_rate": 1.2553511048193044e-08, |
| "logits/chosen": -0.65625, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -1.015625, |
| "logps/rejected": -1.1484375, |
| "loss": 0.884, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.07080078125, |
| "rewards/margins": 0.037353515625, |
| "rewards/rejected": -0.10791015625, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9252136752136753, |
| "grad_norm": 66.29958051188179, |
| "learning_rate": 1.1869694735065606e-08, |
| "logits/chosen": -0.66015625, |
| "logits/rejected": -0.61328125, |
| "logps/chosen": -0.765625, |
| "logps/rejected": -0.7734375, |
| "loss": 0.9717, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0634765625, |
| "rewards/margins": 0.025390625, |
| "rewards/rejected": -0.0888671875, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9273504273504274, |
| "grad_norm": 113.35297326249436, |
| "learning_rate": 1.1204707004845316e-08, |
| "logits/chosen": -0.5546875, |
| "logits/rejected": -0.64453125, |
| "logps/chosen": -1.0078125, |
| "logps/rejected": -0.9609375, |
| "loss": 0.879, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.1279296875, |
| "rewards/margins": 0.0322265625, |
| "rewards/rejected": -0.16015625, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9294871794871795, |
| "grad_norm": 21.10807579120109, |
| "learning_rate": 1.0558584886974482e-08, |
| "logits/chosen": -0.421875, |
| "logits/rejected": -0.37890625, |
| "logps/chosen": -0.98046875, |
| "logps/rejected": -0.8515625, |
| "loss": 0.8398, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0162353515625, |
| "rewards/margins": 0.072265625, |
| "rewards/rejected": -0.08837890625, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9316239316239316, |
| "grad_norm": 25.73470244065982, |
| "learning_rate": 9.931364360375016e-09, |
| "logits/chosen": -0.5546875, |
| "logits/rejected": -0.57421875, |
| "logps/chosen": -1.46875, |
| "logps/rejected": -2.0, |
| "loss": 0.8243, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.07080078125, |
| "rewards/margins": 0.04296875, |
| "rewards/rejected": -0.11376953125, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9337606837606838, |
| "grad_norm": 38.22779506686182, |
| "learning_rate": 9.323080351445167e-09, |
| "logits/chosen": -0.49609375, |
| "logits/rejected": -0.65234375, |
| "logps/chosen": -1.4609375, |
| "logps/rejected": -1.65625, |
| "loss": 0.8678, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.05517578125, |
| "rewards/margins": 0.09326171875, |
| "rewards/rejected": -0.1484375, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9358974358974359, |
| "grad_norm": 31.173946501797236, |
| "learning_rate": 8.733766732114484e-09, |
| "logits/chosen": -0.78515625, |
| "logits/rejected": -0.7578125, |
| "logps/chosen": -0.9921875, |
| "logps/rejected": -1.09375, |
| "loss": 0.8744, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.126953125, |
| "rewards/margins": 0.019775390625, |
| "rewards/rejected": -0.1474609375, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.938034188034188, |
| "grad_norm": 42.95244579752985, |
| "learning_rate": 8.163456317957856e-09, |
| "logits/chosen": -0.3203125, |
| "logits/rejected": -0.326171875, |
| "logps/chosen": -0.63671875, |
| "logps/rejected": -0.703125, |
| "loss": 0.8615, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.06103515625, |
| "rewards/margins": 0.048828125, |
| "rewards/rejected": -0.10986328125, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9401709401709402, |
| "grad_norm": 27.187695525499535, |
| "learning_rate": 7.612180866367995e-09, |
| "logits/chosen": -0.43359375, |
| "logits/rejected": -0.45703125, |
| "logps/chosen": -0.7734375, |
| "logps/rejected": -0.796875, |
| "loss": 0.8594, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0849609375, |
| "rewards/margins": 0.019775390625, |
| "rewards/rejected": -0.1044921875, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9423076923076923, |
| "grad_norm": 40.38489726441983, |
| "learning_rate": 7.079971074787322e-09, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.427734375, |
| "logps/chosen": -1.40625, |
| "logps/rejected": -0.859375, |
| "loss": 0.9238, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.032958984375, |
| "rewards/margins": 0.050048828125, |
| "rewards/rejected": -0.0830078125, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9444444444444444, |
| "grad_norm": 90.14195771853652, |
| "learning_rate": 6.5668565789983036e-09, |
| "logits/chosen": -0.66015625, |
| "logits/rejected": -0.62109375, |
| "logps/chosen": -3.09375, |
| "logps/rejected": -1.1875, |
| "loss": 0.929, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0169677734375, |
| "rewards/margins": 0.09912109375, |
| "rewards/rejected": -0.08251953125, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9465811965811965, |
| "grad_norm": 36.27940783870119, |
| "learning_rate": 6.072865951473316e-09, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -0.91015625, |
| "logps/rejected": -0.8671875, |
| "loss": 0.927, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0908203125, |
| "rewards/margins": 0.01611328125, |
| "rewards/rejected": -0.10693359375, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.9487179487179487, |
| "grad_norm": 39.72454819271685, |
| "learning_rate": 5.59802669978377e-09, |
| "logits/chosen": -0.5390625, |
| "logits/rejected": -0.609375, |
| "logps/chosen": -2.234375, |
| "logps/rejected": -1.7109375, |
| "loss": 0.8793, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.11083984375, |
| "rewards/margins": -0.0004425048828125, |
| "rewards/rejected": -0.1103515625, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9508547008547008, |
| "grad_norm": 34.77865009821792, |
| "learning_rate": 5.142365265068022e-09, |
| "logits/chosen": -0.58984375, |
| "logits/rejected": -0.6015625, |
| "logps/chosen": -0.83984375, |
| "logps/rejected": -0.7890625, |
| "loss": 0.9043, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.087890625, |
| "rewards/margins": 0.01446533203125, |
| "rewards/rejected": -0.1025390625, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9529914529914529, |
| "grad_norm": 29.280675486054747, |
| "learning_rate": 4.705907020559363e-09, |
| "logits/chosen": -0.515625, |
| "logits/rejected": -0.625, |
| "logps/chosen": -0.8515625, |
| "logps/rejected": -1.484375, |
| "loss": 0.8662, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.050537109375, |
| "rewards/margins": 0.0206298828125, |
| "rewards/rejected": -0.0712890625, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9551282051282052, |
| "grad_norm": 19.018888833187862, |
| "learning_rate": 4.288676270172959e-09, |
| "logits/chosen": -0.62109375, |
| "logits/rejected": -0.6015625, |
| "logps/chosen": -0.7421875, |
| "logps/rejected": -1.03125, |
| "loss": 0.8485, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0595703125, |
| "rewards/margins": 0.06591796875, |
| "rewards/rejected": -0.1259765625, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.9572649572649573, |
| "grad_norm": 44.77098877511915, |
| "learning_rate": 3.890696247152425e-09, |
| "logits/chosen": -0.57421875, |
| "logits/rejected": -0.6484375, |
| "logps/chosen": -1.8984375, |
| "logps/rejected": -1.6015625, |
| "loss": 0.8834, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0703125, |
| "rewards/margins": 0.0279541015625, |
| "rewards/rejected": -0.09814453125, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9594017094017094, |
| "grad_norm": 37.310355068897586, |
| "learning_rate": 3.5119891127762592e-09, |
| "logits/chosen": -0.53515625, |
| "logits/rejected": -0.458984375, |
| "logps/chosen": -0.97265625, |
| "logps/rejected": -1.0, |
| "loss": 0.9282, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.10107421875, |
| "rewards/margins": 0.0152587890625, |
| "rewards/rejected": -0.1162109375, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "grad_norm": 33.412721023215504, |
| "learning_rate": 3.1525759551237485e-09, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -0.66796875, |
| "logps/rejected": -0.66015625, |
| "loss": 0.8937, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.076171875, |
| "rewards/margins": 0.02001953125, |
| "rewards/rejected": -0.0966796875, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9636752136752137, |
| "grad_norm": 25.449096875471138, |
| "learning_rate": 2.8124767879005752e-09, |
| "logits/chosen": -0.71484375, |
| "logits/rejected": -0.765625, |
| "logps/chosen": -0.87109375, |
| "logps/rejected": -1.4765625, |
| "loss": 0.8815, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0771484375, |
| "rewards/margins": 0.031005859375, |
| "rewards/rejected": -0.10791015625, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.9658119658119658, |
| "grad_norm": 26.9546143729673, |
| "learning_rate": 2.491710549324644e-09, |
| "logits/chosen": -0.4296875, |
| "logits/rejected": -0.470703125, |
| "logps/chosen": -0.984375, |
| "logps/rejected": -1.78125, |
| "loss": 0.8539, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.06005859375, |
| "rewards/margins": 0.046142578125, |
| "rewards/rejected": -0.10595703125, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.967948717948718, |
| "grad_norm": 21.60271449407226, |
| "learning_rate": 2.190295101071171e-09, |
| "logits/chosen": -0.5234375, |
| "logits/rejected": -0.5078125, |
| "logps/chosen": -0.7578125, |
| "logps/rejected": -0.9921875, |
| "loss": 0.8593, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0810546875, |
| "rewards/margins": 0.037109375, |
| "rewards/rejected": -0.1181640625, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.9700854700854701, |
| "grad_norm": 21.38516131392487, |
| "learning_rate": 1.9082472272783146e-09, |
| "logits/chosen": -0.40234375, |
| "logits/rejected": -0.4921875, |
| "logps/chosen": -0.7890625, |
| "logps/rejected": -0.80078125, |
| "loss": 0.8829, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.06640625, |
| "rewards/margins": 0.018310546875, |
| "rewards/rejected": -0.0849609375, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 81.83247110230516, |
| "learning_rate": 1.6455826336124857e-09, |
| "logits/chosen": -0.7578125, |
| "logits/rejected": -0.671875, |
| "logps/chosen": -1.453125, |
| "logps/rejected": -1.59375, |
| "loss": 0.9213, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.064453125, |
| "rewards/margins": 0.0260009765625, |
| "rewards/rejected": -0.09033203125, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 60.332814313074266, |
| "learning_rate": 1.4023159463938173e-09, |
| "logits/chosen": -0.6015625, |
| "logits/rejected": -0.6484375, |
| "logps/chosen": -0.828125, |
| "logps/rejected": -0.91796875, |
| "loss": 0.9134, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.06494140625, |
| "rewards/margins": 0.036376953125, |
| "rewards/rejected": -0.1015625, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9764957264957265, |
| "grad_norm": 49.45626820858851, |
| "learning_rate": 1.178460711781587e-09, |
| "logits/chosen": -0.65625, |
| "logits/rejected": -0.703125, |
| "logps/chosen": -0.85546875, |
| "logps/rejected": -1.1484375, |
| "loss": 0.869, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.052490234375, |
| "rewards/margins": 0.07421875, |
| "rewards/rejected": -0.126953125, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9786324786324786, |
| "grad_norm": 42.320706874764625, |
| "learning_rate": 9.74029395020065e-10, |
| "logits/chosen": -0.58984375, |
| "logits/rejected": -0.69921875, |
| "logps/chosen": -0.8671875, |
| "logps/rejected": -1.2265625, |
| "loss": 0.9081, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0625, |
| "rewards/margins": 0.061767578125, |
| "rewards/rejected": -0.12451171875, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9807692307692307, |
| "grad_norm": 35.68338029015088, |
| "learning_rate": 7.890333797442805e-10, |
| "logits/chosen": -0.57421875, |
| "logits/rejected": -0.61328125, |
| "logps/chosen": -0.7578125, |
| "logps/rejected": -0.86328125, |
| "loss": 0.8683, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0771484375, |
| "rewards/margins": 0.051513671875, |
| "rewards/rejected": -0.12890625, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9829059829059829, |
| "grad_norm": 40.17334625901446, |
| "learning_rate": 6.234829673462505e-10, |
| "logits/chosen": -0.5546875, |
| "logits/rejected": -0.490234375, |
| "logps/chosen": -1.171875, |
| "logps/rejected": -1.578125, |
| "loss": 0.8818, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.09228515625, |
| "rewards/margins": 0.04931640625, |
| "rewards/rejected": -0.1416015625, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9850427350427351, |
| "grad_norm": 46.29329122227152, |
| "learning_rate": 4.773873764012059e-10, |
| "logits/chosen": -0.6953125, |
| "logits/rejected": -0.80859375, |
| "logps/chosen": -0.80078125, |
| "logps/rejected": -1.078125, |
| "loss": 0.9723, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.07421875, |
| "rewards/margins": 0.0106201171875, |
| "rewards/rejected": -0.0849609375, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9871794871794872, |
| "grad_norm": 38.67355799271119, |
| "learning_rate": 3.507547421543966e-10, |
| "logits/chosen": -0.251953125, |
| "logits/rejected": -0.333984375, |
| "logps/chosen": -1.09375, |
| "logps/rejected": -0.79296875, |
| "loss": 0.9374, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.09326171875, |
| "rewards/margins": 0.021484375, |
| "rewards/rejected": -0.11474609375, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9893162393162394, |
| "grad_norm": 25.110448028534133, |
| "learning_rate": 2.435921160678922e-10, |
| "logits/chosen": -0.74609375, |
| "logits/rejected": -0.765625, |
| "logps/chosen": -1.078125, |
| "logps/rejected": -1.09375, |
| "loss": 0.8717, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.0751953125, |
| "rewards/margins": 0.0458984375, |
| "rewards/rejected": -0.12109375, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9914529914529915, |
| "grad_norm": 33.231328715847084, |
| "learning_rate": 1.559054654281966e-10, |
| "logits/chosen": -0.490234375, |
| "logits/rejected": -0.47265625, |
| "logps/chosen": -1.3125, |
| "logps/rejected": -1.53125, |
| "loss": 0.9052, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.1337890625, |
| "rewards/margins": 0.0252685546875, |
| "rewards/rejected": -0.1591796875, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9935897435897436, |
| "grad_norm": 21.045630521161826, |
| "learning_rate": 8.769967301381909e-11, |
| "logits/chosen": -0.66015625, |
| "logits/rejected": -0.66015625, |
| "logps/chosen": -0.9296875, |
| "logps/rejected": -1.046875, |
| "loss": 0.8311, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.10498046875, |
| "rewards/margins": 0.0595703125, |
| "rewards/rejected": -0.1650390625, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9957264957264957, |
| "grad_norm": 18.01335853415681, |
| "learning_rate": 3.8978536823230934e-11, |
| "logits/chosen": -0.2734375, |
| "logits/rejected": -0.333984375, |
| "logps/chosen": -1.234375, |
| "logps/rejected": -1.8671875, |
| "loss": 0.8609, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.04052734375, |
| "rewards/margins": 0.1318359375, |
| "rewards/rejected": -0.171875, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9978632478632479, |
| "grad_norm": 48.25462426916188, |
| "learning_rate": 9.744769863712088e-12, |
| "logits/chosen": -0.609375, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -0.8203125, |
| "logps/rejected": -0.9140625, |
| "loss": 0.8513, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.08984375, |
| "rewards/margins": 0.059326171875, |
| "rewards/rejected": -0.1494140625, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 31.93733299063659, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.6875, |
| "logits/rejected": -0.72265625, |
| "logps/chosen": -0.80859375, |
| "logps/rejected": -1.1875, |
| "loss": 0.887, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.1064453125, |
| "rewards/margins": 0.0274658203125, |
| "rewards/rejected": -0.1337890625, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 468, |
| "total_flos": 0.0, |
| "train_loss": 0.9510387192424546, |
| "train_runtime": 8444.0036, |
| "train_samples_per_second": 7.091, |
| "train_steps_per_second": 0.055 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 468, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|