| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9962157048249763, |
| "eval_steps": 500, |
| "global_step": 162, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 191.35738502801618, |
| "learning_rate": 2.941176470588235e-08, |
| "logits/chosen": 4.0547356605529785, |
| "logits/rejected": 3.9409475326538086, |
| "logps/chosen": -31881.79296875, |
| "logps/pi_response": -18460.900390625, |
| "logps/ref_response": -18460.900390625, |
| "logps/rejected": -32645.6484375, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "eta": 0.0009999999310821295, |
| "grad_norm": 268.63004507915036, |
| "learning_rate": 2.941176470588235e-07, |
| "logits/chosen": 3.992137908935547, |
| "logits/rejected": 3.936304807662964, |
| "logps/chosen": -32467.201171875, |
| "logps/pi_response": -19245.36328125, |
| "logps/ref_response": -19250.427734375, |
| "logps/rejected": -32452.345703125, |
| "loss": 0.6758, |
| "rewards/accuracies": 0.26923078298568726, |
| "rewards/chosen": -0.5232506990432739, |
| "rewards/margins": 0.004823178984224796, |
| "rewards/rejected": -0.5280739068984985, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 292.31335814293095, |
| "learning_rate": 4.99472085783721e-07, |
| "logits/chosen": 3.628862142562866, |
| "logits/rejected": 3.504181146621704, |
| "logps/chosen": -33230.75, |
| "logps/pi_response": -17491.447265625, |
| "logps/ref_response": -17562.1328125, |
| "logps/rejected": -32794.78125, |
| "loss": 0.7567, |
| "rewards/accuracies": 0.5269230604171753, |
| "rewards/chosen": 0.06102239713072777, |
| "rewards/margins": 1.0328426361083984, |
| "rewards/rejected": -0.9718202948570251, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 325.0539602068029, |
| "learning_rate": 4.901488388458247e-07, |
| "logits/chosen": 3.735680103302002, |
| "logits/rejected": 3.570699453353882, |
| "logps/chosen": -31700.63671875, |
| "logps/pi_response": -18340.26953125, |
| "logps/ref_response": -18427.099609375, |
| "logps/rejected": -32393.19921875, |
| "loss": 0.7141, |
| "rewards/accuracies": 0.4769230782985687, |
| "rewards/chosen": 0.9040184617042542, |
| "rewards/margins": 1.0169713497161865, |
| "rewards/rejected": -0.11295279860496521, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.25, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 527.7409272120059, |
| "learning_rate": 4.695964991097616e-07, |
| "logits/chosen": 3.895160436630249, |
| "logits/rejected": 3.7793385982513428, |
| "logps/chosen": -32656.44921875, |
| "logps/pi_response": -18499.296875, |
| "logps/ref_response": -18572.958984375, |
| "logps/rejected": -32907.3125, |
| "loss": 0.6952, |
| "rewards/accuracies": 0.4769230782985687, |
| "rewards/chosen": 0.636614978313446, |
| "rewards/margins": 0.6588320732116699, |
| "rewards/rejected": -0.022217150777578354, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.31, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 253.1576881360588, |
| "learning_rate": 4.3877607113930516e-07, |
| "logits/chosen": 3.648803949356079, |
| "logits/rejected": 3.5239527225494385, |
| "logps/chosen": -32680.76953125, |
| "logps/pi_response": -17495.150390625, |
| "logps/ref_response": -17521.41015625, |
| "logps/rejected": -32909.08984375, |
| "loss": 0.6422, |
| "rewards/accuracies": 0.42692306637763977, |
| "rewards/chosen": -0.40684789419174194, |
| "rewards/margins": 0.6205180287361145, |
| "rewards/rejected": -1.027365803718567, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.37, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 320.74960989731557, |
| "learning_rate": 3.991286838919086e-07, |
| "logits/chosen": 3.655803918838501, |
| "logits/rejected": 3.5389180183410645, |
| "logps/chosen": -33417.07421875, |
| "logps/pi_response": -18680.818359375, |
| "logps/ref_response": -18699.423828125, |
| "logps/rejected": -32173.2578125, |
| "loss": 0.6291, |
| "rewards/accuracies": 0.5230769515037537, |
| "rewards/chosen": -0.396637499332428, |
| "rewards/margins": 0.7289350032806396, |
| "rewards/rejected": -1.1255724430084229, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.43, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 181.97105487380267, |
| "learning_rate": 3.52508205130354e-07, |
| "logits/chosen": 3.8402271270751953, |
| "logits/rejected": 3.7171554565429688, |
| "logps/chosen": -32983.04296875, |
| "logps/pi_response": -18114.55859375, |
| "logps/ref_response": -18124.4375, |
| "logps/rejected": -32464.29296875, |
| "loss": 0.6053, |
| "rewards/accuracies": 0.4615384638309479, |
| "rewards/chosen": -0.46990343928337097, |
| "rewards/margins": 0.38820669054985046, |
| "rewards/rejected": -0.858110249042511, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.49, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 152.1361225175544, |
| "learning_rate": 3.010945566265912e-07, |
| "logits/chosen": 3.8307697772979736, |
| "logits/rejected": 3.6418731212615967, |
| "logps/chosen": -33045.02734375, |
| "logps/pi_response": -18411.818359375, |
| "logps/ref_response": -18399.951171875, |
| "logps/rejected": -32295.091796875, |
| "loss": 0.5919, |
| "rewards/accuracies": 0.557692289352417, |
| "rewards/chosen": -0.7943554520606995, |
| "rewards/margins": 0.64806067943573, |
| "rewards/rejected": -1.4424160718917847, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.55, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 254.93088711947533, |
| "learning_rate": 2.4729178344249006e-07, |
| "logits/chosen": 3.744925022125244, |
| "logits/rejected": 3.602116107940674, |
| "logps/chosen": -32183.412109375, |
| "logps/pi_response": -18219.287109375, |
| "logps/ref_response": -18195.8125, |
| "logps/rejected": -32723.04296875, |
| "loss": 0.5946, |
| "rewards/accuracies": 0.5307692289352417, |
| "rewards/chosen": -0.8943226933479309, |
| "rewards/margins": 0.6636541485786438, |
| "rewards/rejected": -1.5579768419265747, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.61, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 155.63742344555874, |
| "learning_rate": 1.9361564345465145e-07, |
| "logits/chosen": 3.838331460952759, |
| "logits/rejected": 3.709167003631592, |
| "logps/chosen": -33436.125, |
| "logps/pi_response": -16940.001953125, |
| "logps/ref_response": -16922.8671875, |
| "logps/rejected": -32356.5234375, |
| "loss": 0.6018, |
| "rewards/accuracies": 0.48846152424812317, |
| "rewards/chosen": -0.7997922301292419, |
| "rewards/margins": 0.4609057307243347, |
| "rewards/rejected": -1.2606979608535767, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.68, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 89.01442356583244, |
| "learning_rate": 1.4257597331216208e-07, |
| "logits/chosen": 3.98964524269104, |
| "logits/rejected": 3.8796193599700928, |
| "logps/chosen": -32653.2890625, |
| "logps/pi_response": -17906.2265625, |
| "logps/ref_response": -17896.880859375, |
| "logps/rejected": -32700.134765625, |
| "loss": 0.6, |
| "rewards/accuracies": 0.4923076927661896, |
| "rewards/chosen": -0.20051293075084686, |
| "rewards/margins": 0.46511974930763245, |
| "rewards/rejected": -0.6656327247619629, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.74, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 83.3705719230307, |
| "learning_rate": 9.655933126436563e-08, |
| "logits/chosen": 3.9262661933898926, |
| "logits/rejected": 3.8268930912017822, |
| "logps/chosen": -32867.8359375, |
| "logps/pi_response": -17454.138671875, |
| "logps/ref_response": -17433.240234375, |
| "logps/rejected": -32855.66015625, |
| "loss": 0.5881, |
| "rewards/accuracies": 0.5192307829856873, |
| "rewards/chosen": -0.674457311630249, |
| "rewards/margins": 0.4932273328304291, |
| "rewards/rejected": -1.167684555053711, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 74.20493318211953, |
| "learning_rate": 5.771740434959277e-08, |
| "logits/chosen": 3.9484238624572754, |
| "logits/rejected": 3.8283846378326416, |
| "logps/chosen": -33543.33984375, |
| "logps/pi_response": -18303.177734375, |
| "logps/ref_response": -18285.470703125, |
| "logps/rejected": -32467.873046875, |
| "loss": 0.5825, |
| "rewards/accuracies": 0.5384615659713745, |
| "rewards/chosen": -0.9354388117790222, |
| "rewards/margins": 0.5288434624671936, |
| "rewards/rejected": -1.4642821550369263, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.86, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 214.3061926146843, |
| "learning_rate": 2.7866397900677185e-08, |
| "logits/chosen": 3.9201173782348633, |
| "logits/rejected": 3.743128538131714, |
| "logps/chosen": -32812.75390625, |
| "logps/pi_response": -17746.27734375, |
| "logps/ref_response": -17724.193359375, |
| "logps/rejected": -32488.669921875, |
| "loss": 0.5732, |
| "rewards/accuracies": 0.5461538434028625, |
| "rewards/chosen": -1.3605297803878784, |
| "rewards/margins": 0.7671653032302856, |
| "rewards/rejected": -2.127694845199585, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.92, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 233.45352677814805, |
| "learning_rate": 8.402111802159412e-09, |
| "logits/chosen": 3.8903307914733887, |
| "logits/rejected": 3.79535174369812, |
| "logps/chosen": -32934.5390625, |
| "logps/pi_response": -18709.8828125, |
| "logps/ref_response": -18687.2265625, |
| "logps/rejected": -33072.52734375, |
| "loss": 0.5728, |
| "rewards/accuracies": 0.48076921701431274, |
| "rewards/chosen": -0.8628613948822021, |
| "rewards/margins": 0.49873629212379456, |
| "rewards/rejected": -1.3615975379943848, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.98, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 86.80955639874749, |
| "learning_rate": 2.3467443900582197e-10, |
| "logits/chosen": 3.8695499897003174, |
| "logits/rejected": 3.724947929382324, |
| "logps/chosen": -32277.80859375, |
| "logps/pi_response": -15952.685546875, |
| "logps/ref_response": -15933.2509765625, |
| "logps/rejected": -32813.515625, |
| "loss": 0.5703, |
| "rewards/accuracies": 0.5384615659713745, |
| "rewards/chosen": -0.6783939003944397, |
| "rewards/margins": 0.6465076208114624, |
| "rewards/rejected": -1.3249014616012573, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 162, |
| "total_flos": 0.0, |
| "train_loss": 0.6241708625981837, |
| "train_runtime": 25355.9873, |
| "train_samples_per_second": 0.834, |
| "train_steps_per_second": 0.006 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 162, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|