| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 63, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016, |
| "grad_norm": 10.069208002389907, |
| "learning_rate": 2e-05, |
| "logits/chosen": 1.40966796875, |
| "logits/rejected": 1.90234375, |
| "logps/chosen": -202.875, |
| "logps/rejected": -39.71875, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 12.366009999221836, |
| "learning_rate": 4e-05, |
| "logits/chosen": 1.37548828125, |
| "logits/rejected": 2.0009765625, |
| "logps/chosen": -251.25, |
| "logps/rejected": -40.09375, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.7217222726873707, |
| "learning_rate": 6e-05, |
| "logits/chosen": 2.0615234375, |
| "logits/rejected": 1.708984375, |
| "logps/chosen": -253.8125, |
| "logps/rejected": -70.875, |
| "loss": 0.0181, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.26220703125, |
| "rewards/margins": 4.46875, |
| "rewards/rejected": -3.208984375, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.9046680440747444, |
| "learning_rate": 8e-05, |
| "logits/chosen": 1.35107421875, |
| "logits/rejected": 1.4130859375, |
| "logps/chosen": -194.25, |
| "logps/rejected": -140.25, |
| "loss": 0.0225, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.402099609375, |
| "rewards/margins": 10.078125, |
| "rewards/rejected": -9.6875, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 19.759479094786197, |
| "learning_rate": 0.0001, |
| "logits/chosen": 0.591796875, |
| "logits/rejected": 0.6298599243164062, |
| "logps/chosen": -431.25, |
| "logps/rejected": -201.25, |
| "loss": 0.7488, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -12.28125, |
| "rewards/margins": 3.73828125, |
| "rewards/rejected": -16.046875, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.0315492671962288e-06, |
| "learning_rate": 0.00012, |
| "logits/chosen": 2.171875, |
| "logits/rejected": 0.79638671875, |
| "logps/chosen": -320.375, |
| "logps/rejected": -269.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.7626953125, |
| "rewards/margins": 20.34375, |
| "rewards/rejected": -23.15625, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.0007913850356246983, |
| "learning_rate": 0.00014, |
| "logits/chosen": 2.4765625, |
| "logits/rejected": 1.0927734375, |
| "logps/chosen": -311.5, |
| "logps/rejected": -300.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.390625, |
| "rewards/margins": 18.8203125, |
| "rewards/rejected": -26.234375, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 3.55577097734532, |
| "learning_rate": 0.00016, |
| "logits/chosen": 2.388671875, |
| "logits/rejected": 1.0126953125, |
| "logps/chosen": -396.25, |
| "logps/rejected": -314.0, |
| "loss": 0.0956, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -14.8359375, |
| "rewards/margins": 12.71875, |
| "rewards/rejected": -27.546875, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.1632494807287445e-10, |
| "learning_rate": 0.00018, |
| "logits/chosen": 3.025390625, |
| "logits/rejected": 0.689453125, |
| "logps/chosen": -277.0, |
| "logps/rejected": -431.75, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.91796875, |
| "rewards/margins": 33.296875, |
| "rewards/rejected": -39.28125, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.62273040339937e-05, |
| "learning_rate": 0.0002, |
| "logits/chosen": 3.20703125, |
| "logits/rejected": 0.896484375, |
| "logps/chosen": -288.25, |
| "logps/rejected": -448.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -10.029296875, |
| "rewards/margins": 30.765625, |
| "rewards/rejected": -40.75, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 29.485616401220902, |
| "learning_rate": 0.00019622641509433963, |
| "logits/chosen": 3.3828125, |
| "logits/rejected": 1.173828125, |
| "logps/chosen": -562.375, |
| "logps/rejected": -450.75, |
| "loss": 0.7779, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -28.53125, |
| "rewards/margins": 12.4609375, |
| "rewards/rejected": -40.9375, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.934753970465984e-12, |
| "learning_rate": 0.00019245283018867927, |
| "logits/chosen": 3.322265625, |
| "logits/rejected": 0.859375, |
| "logps/chosen": -412.25, |
| "logps/rejected": -626.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -17.078125, |
| "rewards/margins": 41.640625, |
| "rewards/rejected": -58.75, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 12.013076102617397, |
| "learning_rate": 0.00018867924528301889, |
| "logits/chosen": 2.99609375, |
| "logits/rejected": 0.555908203125, |
| "logps/chosen": -595.375, |
| "logps/rejected": -626.5, |
| "loss": 0.5391, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -32.6640625, |
| "rewards/margins": 25.96875, |
| "rewards/rejected": -58.625, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 7.6760618149121e-10, |
| "learning_rate": 0.0001849056603773585, |
| "logits/chosen": 2.908203125, |
| "logits/rejected": 2.189453125, |
| "logps/chosen": -475.25, |
| "logps/rejected": -759.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -24.125, |
| "rewards/margins": 47.96875, |
| "rewards/rejected": -72.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 890.1553140771243, |
| "learning_rate": 0.00018113207547169812, |
| "logits/chosen": 3.0703125, |
| "logits/rejected": 2.83203125, |
| "logps/chosen": -1423.0, |
| "logps/rejected": -737.0, |
| "loss": 57.7188, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -117.1875, |
| "rewards/margins": -48.046875, |
| "rewards/rejected": -68.90625, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 2.3766667756398814e-08, |
| "learning_rate": 0.00017735849056603776, |
| "logits/chosen": 4.076171875, |
| "logits/rejected": 3.078125, |
| "logps/chosen": -521.75, |
| "logps/rejected": -752.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -27.234375, |
| "rewards/margins": 43.96875, |
| "rewards/rejected": -71.125, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 3.6434206196266317e-06, |
| "learning_rate": 0.00017358490566037738, |
| "logits/chosen": 4.32421875, |
| "logits/rejected": 3.45703125, |
| "logps/chosen": -449.0, |
| "logps/rejected": -740.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -24.625, |
| "rewards/margins": 44.890625, |
| "rewards/rejected": -69.3125, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.0001148921285248143, |
| "learning_rate": 0.000169811320754717, |
| "logits/chosen": 4.5546875, |
| "logits/rejected": 3.6796875, |
| "logps/chosen": -527.0, |
| "logps/rejected": -753.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.4140625, |
| "rewards/margins": 41.578125, |
| "rewards/rejected": -71.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 14.827452248587537, |
| "learning_rate": 0.0001660377358490566, |
| "logits/chosen": 4.703125, |
| "logits/rejected": 4.08203125, |
| "logps/chosen": -643.25, |
| "logps/rejected": -741.5, |
| "loss": 0.4417, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -39.75, |
| "rewards/margins": 30.328125, |
| "rewards/rejected": -70.0625, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.2454827573591818e-18, |
| "learning_rate": 0.00016226415094339625, |
| "logits/chosen": 2.88671875, |
| "logits/rejected": 3.017578125, |
| "logps/chosen": -506.75, |
| "logps/rejected": -834.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -21.97265625, |
| "rewards/margins": 57.4375, |
| "rewards/rejected": -79.5, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 7.951357148069105e-13, |
| "learning_rate": 0.00015849056603773587, |
| "logits/chosen": 1.654296875, |
| "logits/rejected": 2.53125, |
| "logps/chosen": -409.0, |
| "logps/rejected": -849.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -21.46484375, |
| "rewards/margins": 59.28125, |
| "rewards/rejected": -80.875, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.002673619933126348, |
| "learning_rate": 0.0001547169811320755, |
| "logits/chosen": 0.790283203125, |
| "logits/rejected": 2.2431640625, |
| "logps/chosen": -725.0, |
| "logps/rejected": -822.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -43.8125, |
| "rewards/margins": 33.734375, |
| "rewards/rejected": -77.625, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 28.803556786399305, |
| "learning_rate": 0.0001509433962264151, |
| "logits/chosen": 0.34479522705078125, |
| "logits/rejected": 2.234375, |
| "logps/chosen": -807.5, |
| "logps/rejected": -829.0, |
| "loss": 0.737, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -52.953125, |
| "rewards/margins": 25.828125, |
| "rewards/rejected": -78.75, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 6.616760062152765e-18, |
| "learning_rate": 0.00014716981132075472, |
| "logits/chosen": 1.5146484375, |
| "logits/rejected": 1.8779296875, |
| "logps/chosen": -495.75, |
| "logps/rejected": -890.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -22.546875, |
| "rewards/margins": 62.40625, |
| "rewards/rejected": -85.0625, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.053256029955234e-22, |
| "learning_rate": 0.00014339622641509434, |
| "logits/chosen": 1.84375, |
| "logits/rejected": 1.8896484375, |
| "logps/chosen": -420.25, |
| "logps/rejected": -919.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -20.05078125, |
| "rewards/margins": 67.875, |
| "rewards/rejected": -87.9375, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 2.9563266933229072e-15, |
| "learning_rate": 0.00013962264150943395, |
| "logits/chosen": 2.0087890625, |
| "logits/rejected": 1.9580078125, |
| "logps/chosen": -558.0, |
| "logps/rejected": -932.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.9375, |
| "rewards/margins": 59.375, |
| "rewards/rejected": -89.1875, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 1.477158681297205e-12, |
| "learning_rate": 0.0001358490566037736, |
| "logits/chosen": 2.072265625, |
| "logits/rejected": 2.03515625, |
| "logps/chosen": -596.5, |
| "logps/rejected": -940.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -33.125, |
| "rewards/margins": 56.96875, |
| "rewards/rejected": -90.1875, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.1066552941906446e-16, |
| "learning_rate": 0.0001320754716981132, |
| "logits/chosen": 2.1240234375, |
| "logits/rejected": 2.109375, |
| "logps/chosen": -408.0, |
| "logps/rejected": -944.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -22.03125, |
| "rewards/margins": 68.375, |
| "rewards/rejected": -90.375, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 9.271918418710529e-10, |
| "learning_rate": 0.00012830188679245283, |
| "logits/chosen": 2.138671875, |
| "logits/rejected": 2.205078125, |
| "logps/chosen": -733.5, |
| "logps/rejected": -941.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -43.875, |
| "rewards/margins": 46.34375, |
| "rewards/rejected": -90.1875, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.8897943489643426e-08, |
| "learning_rate": 0.00012452830188679244, |
| "logits/chosen": 2.1201171875, |
| "logits/rejected": 2.29296875, |
| "logps/chosen": -630.5, |
| "logps/rejected": -936.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -35.59375, |
| "rewards/margins": 53.625, |
| "rewards/rejected": -89.1875, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 3.4735316128098106e-10, |
| "learning_rate": 0.00012075471698113207, |
| "logits/chosen": 2.0654296875, |
| "logits/rejected": 2.3076171875, |
| "logps/chosen": -639.0, |
| "logps/rejected": -939.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -36.546875, |
| "rewards/margins": 52.625, |
| "rewards/rejected": -89.375, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 2.30972605929251e-07, |
| "learning_rate": 0.0001169811320754717, |
| "logits/chosen": 2.197265625, |
| "logits/rejected": 2.4921875, |
| "logps/chosen": -516.25, |
| "logps/rejected": -920.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.734375, |
| "rewards/margins": 58.25, |
| "rewards/rejected": -88.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 5.090800553960521e-08, |
| "learning_rate": 0.00011320754716981132, |
| "logits/chosen": 2.0634765625, |
| "logits/rejected": 2.587890625, |
| "logps/chosen": -614.0, |
| "logps/rejected": -912.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -36.8359375, |
| "rewards/margins": 50.3125, |
| "rewards/rejected": -87.125, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 7.860715190818793e-09, |
| "learning_rate": 0.00010943396226415095, |
| "logits/chosen": 2.130859375, |
| "logits/rejected": 2.65234375, |
| "logps/chosen": -661.0, |
| "logps/rejected": -904.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -39.125, |
| "rewards/margins": 47.484375, |
| "rewards/rejected": -86.5, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.14310502335505565, |
| "learning_rate": 0.00010566037735849057, |
| "logits/chosen": 2.1650390625, |
| "logits/rejected": 2.712890625, |
| "logps/chosen": -625.5, |
| "logps/rejected": -895.5, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -37.859375, |
| "rewards/margins": 47.5625, |
| "rewards/rejected": -85.375, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 2.342334837346181e-10, |
| "learning_rate": 0.0001018867924528302, |
| "logits/chosen": 1.9853515625, |
| "logits/rejected": 2.67578125, |
| "logps/chosen": -549.25, |
| "logps/rejected": -891.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -31.53125, |
| "rewards/margins": 53.4375, |
| "rewards/rejected": -85.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.3363564536472393, |
| "learning_rate": 9.811320754716981e-05, |
| "logits/chosen": 1.845703125, |
| "logits/rejected": 2.556640625, |
| "logps/chosen": -494.25, |
| "logps/rejected": -847.5, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -28.6015625, |
| "rewards/margins": 51.90625, |
| "rewards/rejected": -80.5, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 1.3263620018804851e-11, |
| "learning_rate": 9.433962264150944e-05, |
| "logits/chosen": 1.615234375, |
| "logits/rejected": 2.5078125, |
| "logps/chosen": -561.25, |
| "logps/rejected": -882.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -30.765625, |
| "rewards/margins": 53.28125, |
| "rewards/rejected": -84.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 1.0577545951792833, |
| "learning_rate": 9.056603773584906e-05, |
| "logits/chosen": 1.38671875, |
| "logits/rejected": 2.419921875, |
| "logps/chosen": -612.75, |
| "logps/rejected": -878.5, |
| "loss": 0.0079, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -36.984375, |
| "rewards/margins": 46.96875, |
| "rewards/rejected": -84.0625, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 3.7701859084434684e-16, |
| "learning_rate": 8.679245283018869e-05, |
| "logits/chosen": 0.78564453125, |
| "logits/rejected": 1.27978515625, |
| "logps/chosen": -524.25, |
| "logps/rejected": -960.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.9765625, |
| "rewards/margins": 60.71875, |
| "rewards/rejected": -90.8125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 2.01326547033749e-15, |
| "learning_rate": 8.30188679245283e-05, |
| "logits/chosen": 0.541259765625, |
| "logits/rejected": 1.248046875, |
| "logps/chosen": -575.0, |
| "logps/rejected": -967.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -30.984375, |
| "rewards/margins": 61.8125, |
| "rewards/rejected": -92.8125, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 9.322472149670949e-15, |
| "learning_rate": 7.924528301886794e-05, |
| "logits/chosen": 0.2445068359375, |
| "logits/rejected": 1.1640625, |
| "logps/chosen": -507.25, |
| "logps/rejected": -960.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -27.421875, |
| "rewards/margins": 64.65625, |
| "rewards/rejected": -92.125, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 7.769637270952163e-08, |
| "learning_rate": 7.547169811320755e-05, |
| "logits/chosen": -0.0588526725769043, |
| "logits/rejected": 1.035888671875, |
| "logps/chosen": -499.75, |
| "logps/rejected": -1004.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.546875, |
| "rewards/margins": 66.328125, |
| "rewards/rejected": -95.875, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 4.25507341439961e-09, |
| "learning_rate": 7.169811320754717e-05, |
| "logits/chosen": -0.14769744873046875, |
| "logits/rejected": 1.09765625, |
| "logps/chosen": -733.25, |
| "logps/rejected": -949.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -43.71875, |
| "rewards/margins": 47.25, |
| "rewards/rejected": -91.0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.60966895891367e-15, |
| "learning_rate": 6.79245283018868e-05, |
| "logits/chosen": -0.33404541015625, |
| "logits/rejected": 1.099609375, |
| "logps/chosen": -505.5, |
| "logps/rejected": -944.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.4609375, |
| "rewards/margins": 60.8125, |
| "rewards/rejected": -90.25, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 92.0641578664817, |
| "learning_rate": 6.415094339622641e-05, |
| "logits/chosen": -0.5057373046875, |
| "logits/rejected": 1.1044921875, |
| "logps/chosen": -745.25, |
| "logps/rejected": -936.0, |
| "loss": 1.5391, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -48.71875, |
| "rewards/margins": 40.84375, |
| "rewards/rejected": -89.5625, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.002803042076571359, |
| "learning_rate": 6.037735849056604e-05, |
| "logits/chosen": -0.39910888671875, |
| "logits/rejected": 0.93603515625, |
| "logps/chosen": -528.0, |
| "logps/rejected": -915.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -30.0546875, |
| "rewards/margins": 57.125, |
| "rewards/rejected": -87.0625, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 5.448283044130183e-16, |
| "learning_rate": 5.660377358490566e-05, |
| "logits/chosen": -0.558837890625, |
| "logits/rejected": 1.12890625, |
| "logps/chosen": -529.5, |
| "logps/rejected": -944.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -28.7890625, |
| "rewards/margins": 61.96875, |
| "rewards/rejected": -90.8125, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 1.6427966944713224e-12, |
| "learning_rate": 5.283018867924528e-05, |
| "logits/chosen": -0.394775390625, |
| "logits/rejected": 1.146484375, |
| "logps/chosen": -676.25, |
| "logps/rejected": -947.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -38.453125, |
| "rewards/margins": 52.25, |
| "rewards/rejected": -90.75, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 3.810907085088454e-16, |
| "learning_rate": 4.9056603773584906e-05, |
| "logits/chosen": -0.5797119140625, |
| "logits/rejected": 1.154296875, |
| "logps/chosen": -526.0, |
| "logps/rejected": -946.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -29.6640625, |
| "rewards/margins": 61.1875, |
| "rewards/rejected": -90.875, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 6.364667179704401e-12, |
| "learning_rate": 4.528301886792453e-05, |
| "logits/chosen": -0.573974609375, |
| "logits/rejected": 1.1572265625, |
| "logps/chosen": -588.5, |
| "logps/rejected": -948.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -33.515625, |
| "rewards/margins": 57.21875, |
| "rewards/rejected": -90.8125, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.4796603970496823e-15, |
| "learning_rate": 4.150943396226415e-05, |
| "logits/chosen": -0.509521484375, |
| "logits/rejected": 1.1650390625, |
| "logps/chosen": -624.75, |
| "logps/rejected": -946.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -34.671875, |
| "rewards/margins": 56.1875, |
| "rewards/rejected": -90.9375, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 2.3490727781381015e-13, |
| "learning_rate": 3.7735849056603776e-05, |
| "logits/chosen": -0.517822265625, |
| "logits/rejected": 1.177734375, |
| "logps/chosen": -665.625, |
| "logps/rejected": -949.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -37.21875, |
| "rewards/margins": 53.5, |
| "rewards/rejected": -90.6875, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 2.3715956941641845e-14, |
| "learning_rate": 3.39622641509434e-05, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": 1.181640625, |
| "logps/chosen": -669.5, |
| "logps/rejected": -946.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -39.078125, |
| "rewards/margins": 51.59375, |
| "rewards/rejected": -90.8125, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 3.890201643859771e-08, |
| "learning_rate": 3.018867924528302e-05, |
| "logits/chosen": -0.495361328125, |
| "logits/rejected": 1.17578125, |
| "logps/chosen": -729.5, |
| "logps/rejected": -948.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -44.0625, |
| "rewards/margins": 46.671875, |
| "rewards/rejected": -91.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 8.799502093565177e-14, |
| "learning_rate": 2.641509433962264e-05, |
| "logits/chosen": -0.48876953125, |
| "logits/rejected": 1.1767578125, |
| "logps/chosen": -607.25, |
| "logps/rejected": -950.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -34.78125, |
| "rewards/margins": 56.15625, |
| "rewards/rejected": -90.9375, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 8.933140987778416e-16, |
| "learning_rate": 2.2641509433962265e-05, |
| "logits/chosen": -0.56591796875, |
| "logits/rejected": 1.1875, |
| "logps/chosen": -461.75, |
| "logps/rejected": -950.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -27.2734375, |
| "rewards/margins": 63.78125, |
| "rewards/rejected": -91.0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 6.596288118090753e-11, |
| "learning_rate": 1.8867924528301888e-05, |
| "logits/chosen": -0.572998046875, |
| "logits/rejected": 1.1953125, |
| "logps/chosen": -578.25, |
| "logps/rejected": -951.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -33.890625, |
| "rewards/margins": 56.96875, |
| "rewards/rejected": -90.875, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 3.7756990031247874e-10, |
| "learning_rate": 1.509433962264151e-05, |
| "logits/chosen": -0.56298828125, |
| "logits/rejected": 1.040863037109375, |
| "logps/chosen": -521.5, |
| "logps/rejected": -947.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -30.6875, |
| "rewards/margins": 59.34375, |
| "rewards/rejected": -90.125, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.7401296070209917e-09, |
| "learning_rate": 1.1320754716981132e-05, |
| "logits/chosen": -0.534423828125, |
| "logits/rejected": 1.189453125, |
| "logps/chosen": -612.75, |
| "logps/rejected": -951.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -36.921875, |
| "rewards/margins": 53.96875, |
| "rewards/rejected": -90.9375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 1.3595843194900355e-10, |
| "learning_rate": 7.547169811320755e-06, |
| "logits/chosen": -0.457275390625, |
| "logits/rejected": 1.19140625, |
| "logps/chosen": -684.5, |
| "logps/rejected": -951.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -40.59375, |
| "rewards/margins": 50.40625, |
| "rewards/rejected": -91.0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 1.292032339993031e-10, |
| "learning_rate": 3.7735849056603773e-06, |
| "logits/chosen": -0.46923828125, |
| "logits/rejected": 1.1953125, |
| "logps/chosen": -576.0, |
| "logps/rejected": -950.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -33.90625, |
| "rewards/margins": 57.03125, |
| "rewards/rejected": -91.0, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.292032339993031e-10, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.58837890625, |
| "logits/rejected": 1.1875, |
| "logps/chosen": -526.5, |
| "logps/rejected": -951.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -32.609375, |
| "rewards/margins": 58.15625, |
| "rewards/rejected": -91.0, |
| "step": 63 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 63, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|