| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9968652037617555, |
| "eval_steps": 500, |
| "global_step": 159, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -2.072277545928955, |
| "logits/rejected": -2.0595779418945312, |
| "logps/chosen": -398.8112487792969, |
| "logps/pi_response": -311.4126892089844, |
| "logps/ref_response": -311.4126892089844, |
| "logps/rejected": -484.30792236328125, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -2.0879757404327393, |
| "logits/rejected": -1.9906916618347168, |
| "logps/chosen": -323.0065612792969, |
| "logps/pi_response": -217.33697509765625, |
| "logps/ref_response": -217.61642456054688, |
| "logps/rejected": -381.7716979980469, |
| "loss": 0.6868, |
| "rewards/accuracies": 0.4791666567325592, |
| "rewards/chosen": -0.005266552325338125, |
| "rewards/margins": 0.0116809643805027, |
| "rewards/rejected": -0.016947515308856964, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.990353313429303e-07, |
| "logits/chosen": -1.9731781482696533, |
| "logits/rejected": -1.9694221019744873, |
| "logps/chosen": -309.8916931152344, |
| "logps/pi_response": -223.656005859375, |
| "logps/ref_response": -227.64566040039062, |
| "logps/rejected": -454.9925231933594, |
| "loss": 0.6618, |
| "rewards/accuracies": 0.581250011920929, |
| "rewards/chosen": -0.1710830181837082, |
| "rewards/margins": 0.27294591069221497, |
| "rewards/rejected": -0.4440288543701172, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.882681251368548e-07, |
| "logits/chosen": -1.9831979274749756, |
| "logits/rejected": -1.925021767616272, |
| "logps/chosen": -335.2173767089844, |
| "logps/pi_response": -229.2874755859375, |
| "logps/ref_response": -233.17965698242188, |
| "logps/rejected": -407.2176818847656, |
| "loss": 0.6719, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.14075350761413574, |
| "rewards/margins": 0.23552139103412628, |
| "rewards/rejected": -0.37627488374710083, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.6604720940421207e-07, |
| "logits/chosen": -2.0499300956726074, |
| "logits/rejected": -1.9712011814117432, |
| "logps/chosen": -340.423095703125, |
| "logps/pi_response": -247.22396850585938, |
| "logps/ref_response": -226.72451782226562, |
| "logps/rejected": -480.654296875, |
| "loss": 0.6482, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.30246642231941223, |
| "rewards/margins": 0.3026350140571594, |
| "rewards/rejected": -0.605101466178894, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.3344075855595097e-07, |
| "logits/chosen": -2.1223251819610596, |
| "logits/rejected": -1.982454538345337, |
| "logps/chosen": -344.250732421875, |
| "logps/pi_response": -275.4682312011719, |
| "logps/ref_response": -248.96499633789062, |
| "logps/rejected": -547.07177734375, |
| "loss": 0.6326, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.29991382360458374, |
| "rewards/margins": 0.4313625395298004, |
| "rewards/rejected": -0.7312763929367065, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 3.920161866827889e-07, |
| "logits/chosen": -1.8565187454223633, |
| "logits/rejected": -1.7435777187347412, |
| "logps/chosen": -347.46746826171875, |
| "logps/pi_response": -278.3968811035156, |
| "logps/ref_response": -236.6101531982422, |
| "logps/rejected": -519.2654418945312, |
| "loss": 0.6007, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.46309342980384827, |
| "rewards/margins": 0.5743097066879272, |
| "rewards/rejected": -1.0374032258987427, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.4376480090239047e-07, |
| "logits/chosen": -1.473629355430603, |
| "logits/rejected": -1.2030349969863892, |
| "logps/chosen": -375.29510498046875, |
| "logps/pi_response": -285.02398681640625, |
| "logps/ref_response": -218.2779998779297, |
| "logps/rejected": -519.3750610351562, |
| "loss": 0.6111, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.7290376424789429, |
| "rewards/margins": 0.5281031727790833, |
| "rewards/rejected": -1.257140874862671, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2.910060778827554e-07, |
| "logits/chosen": -1.3979953527450562, |
| "logits/rejected": -1.066356897354126, |
| "logps/chosen": -396.0669250488281, |
| "logps/pi_response": -294.2704162597656, |
| "logps/ref_response": -234.7672119140625, |
| "logps/rejected": -505.8048400878906, |
| "loss": 0.573, |
| "rewards/accuracies": 0.6312500238418579, |
| "rewards/chosen": -0.6167668104171753, |
| "rewards/margins": 0.3895764946937561, |
| "rewards/rejected": -1.0063434839248657, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 2.3627616503391812e-07, |
| "logits/chosen": -0.893900990486145, |
| "logits/rejected": -0.3373408913612366, |
| "logps/chosen": -409.7942810058594, |
| "logps/pi_response": -316.0831604003906, |
| "logps/ref_response": -232.30178833007812, |
| "logps/rejected": -567.0125122070312, |
| "loss": 0.5714, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.9255668520927429, |
| "rewards/margins": 0.6182758808135986, |
| "rewards/rejected": -1.5438427925109863, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8220596619089573e-07, |
| "logits/chosen": -0.8694812059402466, |
| "logits/rejected": -0.5924743413925171, |
| "logps/chosen": -428.4925231933594, |
| "logps/pi_response": -329.4599304199219, |
| "logps/ref_response": -245.4618682861328, |
| "logps/rejected": -537.4727783203125, |
| "loss": 0.5734, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.9432543516159058, |
| "rewards/margins": 0.4144589900970459, |
| "rewards/rejected": -1.357713222503662, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.3139467229135998e-07, |
| "logits/chosen": -0.9938161969184875, |
| "logits/rejected": -0.45242977142333984, |
| "logps/chosen": -389.0281677246094, |
| "logps/pi_response": -314.51422119140625, |
| "logps/ref_response": -233.9892120361328, |
| "logps/rejected": -560.4173583984375, |
| "loss": 0.5906, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7948281168937683, |
| "rewards/margins": 0.5036167502403259, |
| "rewards/rejected": -1.2984448671340942, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 8.628481651367875e-08, |
| "logits/chosen": -0.982222855091095, |
| "logits/rejected": -0.48237520456314087, |
| "logps/chosen": -397.54718017578125, |
| "logps/pi_response": -313.84527587890625, |
| "logps/ref_response": -230.01510620117188, |
| "logps/rejected": -565.9222412109375, |
| "loss": 0.5429, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.8856877088546753, |
| "rewards/margins": 0.6042315363883972, |
| "rewards/rejected": -1.4899194240570068, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.904486005914027e-08, |
| "logits/chosen": -0.8022481203079224, |
| "logits/rejected": -0.40596532821655273, |
| "logps/chosen": -407.4441833496094, |
| "logps/pi_response": -319.7482604980469, |
| "logps/ref_response": -227.49935913085938, |
| "logps/rejected": -588.3087768554688, |
| "loss": 0.5733, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.9994068145751953, |
| "rewards/margins": 0.6314084529876709, |
| "rewards/rejected": -1.6308151483535767, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.1464952759020856e-08, |
| "logits/chosen": -0.6527538299560547, |
| "logits/rejected": -0.13006380200386047, |
| "logps/chosen": -432.28009033203125, |
| "logps/pi_response": -334.286376953125, |
| "logps/ref_response": -231.6268310546875, |
| "logps/rejected": -581.1611328125, |
| "loss": 0.5507, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0939096212387085, |
| "rewards/margins": 0.6699361801147461, |
| "rewards/rejected": -1.7638458013534546, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.8708793644441086e-09, |
| "logits/chosen": -0.9013077616691589, |
| "logits/rejected": -0.350179523229599, |
| "logps/chosen": -417.94500732421875, |
| "logps/pi_response": -348.1318359375, |
| "logps/ref_response": -250.2039337158203, |
| "logps/rejected": -609.0900268554688, |
| "loss": 0.5658, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9682818651199341, |
| "rewards/margins": 0.6685720086097717, |
| "rewards/rejected": -1.636853814125061, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 159, |
| "total_flos": 0.0, |
| "train_loss": 0.596127321135323, |
| "train_runtime": 4548.9667, |
| "train_samples_per_second": 4.48, |
| "train_steps_per_second": 0.035 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 159, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|