Invalid JSON: Unexpected token 'N', ..."/chosen": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9499796665311102, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.032533550223668155, | |
| "grad_norm": 103.71580505371094, | |
| "kl": 0.0, | |
| "learning_rate": 6.493506493506494e-09, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -158.7671875, | |
| "logps/rejected": -235.8359375, | |
| "loss": 0.5, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06506710044733631, | |
| "grad_norm": 94.47801208496094, | |
| "kl": 0.05097656324505806, | |
| "learning_rate": 2.1103896103896103e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -158.2953125, | |
| "logps/rejected": -232.075, | |
| "loss": 0.498, | |
| "rewards/chosen": 0.009334802627563477, | |
| "rewards/margins": 0.012342309951782227, | |
| "rewards/rejected": -0.00300750732421875, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09760065067100447, | |
| "grad_norm": 143.15447998046875, | |
| "kl": 0.592968761920929, | |
| "learning_rate": 3.733766233766234e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -156.840625, | |
| "logps/rejected": -231.65, | |
| "loss": 0.471, | |
| "rewards/chosen": 0.14929332733154296, | |
| "rewards/margins": 0.1863230228424072, | |
| "rewards/rejected": -0.037029695510864255, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13013420089467262, | |
| "grad_norm": 81.36482238769531, | |
| "kl": 1.6533203125, | |
| "learning_rate": 5.3571428571428564e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -151.7375, | |
| "logps/rejected": -231.0375, | |
| "loss": 0.3677, | |
| "rewards/chosen": 0.6113525390625, | |
| "rewards/margins": 0.9034156799316406, | |
| "rewards/rejected": -0.29206314086914065, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16266775111834078, | |
| "grad_norm": 76.27654266357422, | |
| "kl": 0.14501953125, | |
| "learning_rate": 6.98051948051948e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -141.165625, | |
| "logps/rejected": -251.2015625, | |
| "loss": 0.1578, | |
| "rewards/chosen": 1.392755126953125, | |
| "rewards/margins": 2.920855712890625, | |
| "rewards/rejected": -1.5281005859375, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19520130134200894, | |
| "grad_norm": 3.925260066986084, | |
| "kl": 0.0025390624068677425, | |
| "learning_rate": 8.603896103896104e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -127.53515625, | |
| "logps/rejected": -274.090625, | |
| "loss": 0.0543, | |
| "rewards/chosen": 2.5130126953125, | |
| "rewards/margins": 5.8057373046875, | |
| "rewards/rejected": -3.292724609375, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2277348515656771, | |
| "grad_norm": 0.3158203661441803, | |
| "kl": 0.0, | |
| "learning_rate": 1.0227272727272728e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -100.13359375, | |
| "logps/rejected": -305.453125, | |
| "loss": 0.0025, | |
| "rewards/chosen": 4.975732421875, | |
| "rewards/margins": 10.637548828124999, | |
| "rewards/rejected": -5.66181640625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.26026840178934524, | |
| "grad_norm": 0.01107876282185316, | |
| "kl": 0.0, | |
| "learning_rate": 1.1850649350649349e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -75.421875, | |
| "logps/rejected": -338.096875, | |
| "loss": 0.0007, | |
| "rewards/chosen": 6.189892578125, | |
| "rewards/margins": 13.384521484375, | |
| "rewards/rejected": -7.19462890625, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2928019520130134, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3474025974025975e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -56.57421875, | |
| "logps/rejected": -360.18125, | |
| "loss": 0.0015, | |
| "rewards/chosen": 8.42177734375, | |
| "rewards/margins": 18.254101562499997, | |
| "rewards/rejected": -9.83232421875, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.32533550223668156, | |
| "grad_norm": 0.0, | |
| "kl": 0.15000000596046448, | |
| "learning_rate": 1.5097402597402597e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.366015625, | |
| "logps/rejected": -357.228125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.46904296875, | |
| "rewards/margins": 17.947460937499997, | |
| "rewards/rejected": -9.47841796875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3578690524603497, | |
| "grad_norm": 0.0, | |
| "kl": 0.26777344942092896, | |
| "learning_rate": 1.6720779220779217e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.25625, | |
| "logps/rejected": -355.371875, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.096875, | |
| "rewards/margins": 19.25791015625, | |
| "rewards/rejected": -10.16103515625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3904026026840179, | |
| "grad_norm": 0.0, | |
| "kl": 0.934863269329071, | |
| "learning_rate": 1.8344155844155843e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -47.00859375, | |
| "logps/rejected": -353.48125, | |
| "loss": 0.0002, | |
| "rewards/chosen": 8.9310546875, | |
| "rewards/margins": 17.91005859375, | |
| "rewards/rejected": -8.97900390625, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.42293615290768605, | |
| "grad_norm": 0.029346637427806854, | |
| "kl": 3.103515625, | |
| "learning_rate": 1.9967532467532466e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -46.458203125, | |
| "logps/rejected": -350.59375, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.2421875, | |
| "rewards/margins": 17.04013671875, | |
| "rewards/rejected": -8.79794921875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4554697031313542, | |
| "grad_norm": 0.0, | |
| "kl": 0.69140625, | |
| "learning_rate": 2.159090909090909e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -47.403125, | |
| "logps/rejected": -356.59375, | |
| "loss": 0.0001, | |
| "rewards/chosen": 8.01259765625, | |
| "rewards/margins": 18.42900390625, | |
| "rewards/rejected": -10.41640625, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.48800325335502237, | |
| "grad_norm": 0.0, | |
| "kl": 0.16386719048023224, | |
| "learning_rate": 2.3214285714285714e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -47.1765625, | |
| "logps/rejected": -367.81875, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.698828125, | |
| "rewards/margins": 18.972265625, | |
| "rewards/rejected": -10.2734375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5205368035786905, | |
| "grad_norm": 0.0938071757555008, | |
| "kl": 3.0655274391174316, | |
| "learning_rate": 2.483766233766234e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -47.46953125, | |
| "logps/rejected": -350.990625, | |
| "loss": 0.0001, | |
| "rewards/chosen": 8.87724609375, | |
| "rewards/margins": 17.825390624999997, | |
| "rewards/rejected": -8.94814453125, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5530703538023587, | |
| "grad_norm": 0.0, | |
| "kl": 0.19960936903953552, | |
| "learning_rate": 2.6461038961038964e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.409765625, | |
| "logps/rejected": -372.10625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.11689453125, | |
| "rewards/margins": 19.501953125, | |
| "rewards/rejected": -11.38505859375, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5856039040260268, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.8084415584415584e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.136328125, | |
| "logps/rejected": -387.340625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.95927734375, | |
| "rewards/margins": 20.40771484375, | |
| "rewards/rejected": -11.4484375, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.618137454249695, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.9707792207792204e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -50.2359375, | |
| "logps/rejected": -391.128125, | |
| "loss": 0.0001, | |
| "rewards/chosen": 8.42666015625, | |
| "rewards/margins": 20.3193359375, | |
| "rewards/rejected": -11.89267578125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6506710044733631, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.133116883116883e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.67421875, | |
| "logps/rejected": -390.3875, | |
| "loss": 0.0001, | |
| "rewards/chosen": 8.5646484375, | |
| "rewards/margins": 20.190722656250003, | |
| "rewards/rejected": -11.62607421875, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6832045546970313, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.295454545454545e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -50.19453125, | |
| "logps/rejected": -393.453125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.9052734375, | |
| "rewards/margins": 21.5578125, | |
| "rewards/rejected": -12.6525390625, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7157381049206994, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.457792207792208e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.93125, | |
| "logps/rejected": -401.3375, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.0279296875, | |
| "rewards/margins": 22.509765625, | |
| "rewards/rejected": -13.4818359375, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7482716551443677, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.62012987012987e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -50.351171875, | |
| "logps/rejected": -399.60625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.4427734375, | |
| "rewards/margins": 22.73662109375, | |
| "rewards/rejected": -14.29384765625, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7808052053680358, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.782467532467532e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -50.937109375, | |
| "logps/rejected": -398.54375, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.748828125, | |
| "rewards/margins": 22.75693359375, | |
| "rewards/rejected": -13.00810546875, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.813338755591704, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.9448051948051946e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -50.4765625, | |
| "logps/rejected": -406.259375, | |
| "loss": 0.0, | |
| "rewards/chosen": 7.9296875, | |
| "rewards/margins": 21.64990234375, | |
| "rewards/rejected": -13.72021484375, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8458723058153721, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.1071428571428566e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -50.065234375, | |
| "logps/rejected": -397.978125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.79658203125, | |
| "rewards/margins": 22.427539062500003, | |
| "rewards/rejected": -13.63095703125, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8784058560390403, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.2694805194805197e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.828515625, | |
| "logps/rejected": -395.05, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.353515625, | |
| "rewards/margins": 19.6958984375, | |
| "rewards/rejected": -11.3423828125, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9109394062627084, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.4318181818181817e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.0359375, | |
| "logps/rejected": -401.634375, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.53701171875, | |
| "rewards/margins": 20.25283203125, | |
| "rewards/rejected": -11.7158203125, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9434729564863765, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.5941558441558437e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.019140625, | |
| "logps/rejected": -393.425, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.6474609375, | |
| "rewards/margins": 20.7501953125, | |
| "rewards/rejected": -12.102734375, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9760065067100447, | |
| "grad_norm": 0.016830716282129288, | |
| "kl": 0.0, | |
| "learning_rate": 4.756493506493506e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.725, | |
| "logps/rejected": -394.653125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.255859375, | |
| "rewards/margins": 21.61572265625, | |
| "rewards/rejected": -13.35986328125, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0065067100447336, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.918831168831168e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.37416666666667, | |
| "logps/rejected": -396.87, | |
| "loss": 0.0, | |
| "rewards/chosen": 7.7615625, | |
| "rewards/margins": 19.706875, | |
| "rewards/rejected": -11.9453125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0390402602684017, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.999959861406242e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.408203125, | |
| "logps/rejected": -403.834375, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.22265625, | |
| "rewards/margins": 22.9748046875, | |
| "rewards/rejected": -13.7521484375, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.07157381049207, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.999638760389452e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.765625, | |
| "logps/rejected": -396.6625, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.08193359375, | |
| "rewards/margins": 19.86611328125, | |
| "rewards/rejected": -10.7841796875, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.1041073607157381, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.998996599598879e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.26953125, | |
| "logps/rejected": -393.775, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.9400390625, | |
| "rewards/margins": 20.629199218750003, | |
| "rewards/rejected": -11.68916015625, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1366409109394062, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.998033461515242e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.61484375, | |
| "logps/rejected": -396.7875, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.119140625, | |
| "rewards/margins": 22.4150390625, | |
| "rewards/rejected": -13.2958984375, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1691744611630743, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.996749469846372e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.3609375, | |
| "logps/rejected": -396.115625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.59248046875, | |
| "rewards/margins": 22.016796875, | |
| "rewards/rejected": -13.42431640625, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.2017080113867427, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.995144789511329e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.6953125, | |
| "logps/rejected": -401.90625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.97890625, | |
| "rewards/margins": 22.325878906249997, | |
| "rewards/rejected": -13.34697265625, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2342415616104108, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.993219626619219e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.890625, | |
| "logps/rejected": -401.4625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.95244140625, | |
| "rewards/margins": 22.37314453125, | |
| "rewards/rejected": -13.420703125, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2667751118340789, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.990974228442717e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.7734375, | |
| "logps/rejected": -403.28125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.423828125, | |
| "rewards/margins": 20.5177734375, | |
| "rewards/rejected": -12.0939453125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.299308662057747, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.988408883386307e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.671875, | |
| "logps/rejected": -399.890625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.93525390625, | |
| "rewards/margins": 21.9240234375, | |
| "rewards/rejected": -12.98876953125, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.331842212281415, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.985523920949242e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.679296875, | |
| "logps/rejected": -402.096875, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.42080078125, | |
| "rewards/margins": 20.993359374999997, | |
| "rewards/rejected": -12.57255859375, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3643757625050834, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.982319711683221e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.55546875, | |
| "logps/rejected": -395.29375, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.683203125, | |
| "rewards/margins": 22.695703125, | |
| "rewards/rejected": -13.0125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3969093127287515, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.978796667144791e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.65859375, | |
| "logps/rejected": -403.265625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.24130859375, | |
| "rewards/margins": 21.69130859375, | |
| "rewards/rejected": -13.45, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.4294428629524196, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.974955239842493e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.275390625, | |
| "logps/rejected": -410.9125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.18759765625, | |
| "rewards/margins": 21.62451171875, | |
| "rewards/rejected": -13.4369140625, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.461976413176088, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.970795923178733e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.339453125, | |
| "logps/rejected": -405.471875, | |
| "loss": 0.0, | |
| "rewards/chosen": 7.85322265625, | |
| "rewards/margins": 21.57744140625, | |
| "rewards/rejected": -13.72421875, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.494509963399756, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.966319251386412e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.5921875, | |
| "logps/rejected": -401.665625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.530078125, | |
| "rewards/margins": 21.6490234375, | |
| "rewards/rejected": -13.1189453125, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5270435136234242, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.961525799460308e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.540625, | |
| "logps/rejected": -408.703125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.6064453125, | |
| "rewards/margins": 22.0228515625, | |
| "rewards/rejected": -13.41640625, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5595770638470923, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.956416183083221e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.923046875, | |
| "logps/rejected": -407.4, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.303515625, | |
| "rewards/margins": 23.2646484375, | |
| "rewards/rejected": -14.9611328125, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5921106140707604, | |
| "grad_norm": 0.0061869011260569096, | |
| "kl": 0.0, | |
| "learning_rate": 4.950991058546892e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.971484375, | |
| "logps/rejected": -404.6, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.97802734375, | |
| "rewards/margins": 21.27333984375, | |
| "rewards/rejected": -12.2953125, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.6246441642944287, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.945251122667714e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.584765625, | |
| "logps/rejected": -411.175, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.23486328125, | |
| "rewards/margins": 21.76025390625, | |
| "rewards/rejected": -13.525390625, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6571777145180968, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.93919711269722e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.616015625, | |
| "logps/rejected": -403.053125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.54775390625, | |
| "rewards/margins": 21.637109375, | |
| "rewards/rejected": -13.08935546875, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.689711264741765, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.932829806227398e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.595703125, | |
| "logps/rejected": -401.678125, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.25576171875, | |
| "rewards/margins": 22.1841796875, | |
| "rewards/rejected": -12.92841796875, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.7222448149654332, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.926150021090812e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.584765625, | |
| "logps/rejected": -408.21875, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.191796875, | |
| "rewards/margins": 23.53251953125, | |
| "rewards/rejected": -14.34072265625, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7547783651891011, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.919158615255555e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.476953125, | |
| "logps/rejected": -402.609375, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.7083984375, | |
| "rewards/margins": 22.474316406249997, | |
| "rewards/rejected": -13.76591796875, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7873119154127695, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.911856486715056e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.780078125, | |
| "logps/rejected": -407.21875, | |
| "loss": 0.0, | |
| "rewards/chosen": 9.62373046875, | |
| "rewards/margins": 23.37060546875, | |
| "rewards/rejected": -13.746875, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.8198454656364376, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.904244573372733e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.086328125, | |
| "logps/rejected": -407.178125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.03115234375, | |
| "rewards/margins": 22.18642578125, | |
| "rewards/rejected": -14.1552734375, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8523790158601057, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.896323852921527e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -49.037890625, | |
| "logps/rejected": -406.178125, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.99189453125, | |
| "rewards/margins": 22.690234375000003, | |
| "rewards/rejected": -13.69833984375, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.884912566083774, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.888095342718329e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.119140625, | |
| "logps/rejected": -405.759375, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.451171875, | |
| "rewards/margins": 21.19541015625, | |
| "rewards/rejected": -12.74423828125, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.9174461163074419, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.879560099653306e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.169921875, | |
| "logps/rejected": -406.915625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.71015625, | |
| "rewards/margins": 20.17109375, | |
| "rewards/rejected": -11.4609375, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.9499796665311102, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.870719220014149e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -48.301953125, | |
| "logps/rejected": -401.065625, | |
| "loss": 0.0, | |
| "rewards/chosen": 8.30107421875, | |
| "rewards/margins": 21.400097656249997, | |
| "rewards/rejected": -13.0990234375, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3080, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |