| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.998324958123953, |
| "eval_steps": 100, |
| "global_step": 149, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.3333333333333335e-07, |
| "logits/chosen": -2.491262435913086, |
| "logits/rejected": -2.5593011379241943, |
| "logps/chosen": -151.13595581054688, |
| "logps/rejected": -176.25180053710938, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.3333333333333333e-06, |
| "logits/chosen": -2.657362699508667, |
| "logits/rejected": -2.6224162578582764, |
| "logps/chosen": -177.82272338867188, |
| "logps/rejected": -179.98106384277344, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.4930555522441864, |
| "rewards/chosen": -0.013637593947350979, |
| "rewards/margins": 0.004961313679814339, |
| "rewards/rejected": -0.018598908558487892, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.982842942906386e-06, |
| "logits/chosen": -2.702573299407959, |
| "logits/rejected": -2.6558778285980225, |
| "logps/chosen": -193.16390991210938, |
| "logps/rejected": -188.91493225097656, |
| "loss": 0.694, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.10607340186834335, |
| "rewards/margins": 0.0018767903093248606, |
| "rewards/rejected": -0.10795019567012787, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.846996204000967e-06, |
| "logits/chosen": -2.6284031867980957, |
| "logits/rejected": -2.585899829864502, |
| "logps/chosen": -186.4259490966797, |
| "logps/rejected": -187.31175231933594, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.5406249761581421, |
| "rewards/chosen": -0.0701591819524765, |
| "rewards/margins": 0.008776131086051464, |
| "rewards/rejected": -0.07893531024456024, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.582735470385229e-06, |
| "logits/chosen": -2.6113476753234863, |
| "logits/rejected": -2.6379783153533936, |
| "logps/chosen": -169.52566528320312, |
| "logps/rejected": -175.36927795410156, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.528124988079071, |
| "rewards/chosen": 0.0019457591697573662, |
| "rewards/margins": 0.011439744383096695, |
| "rewards/rejected": -0.009493985213339329, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.204519553876095e-06, |
| "logits/chosen": -2.6095616817474365, |
| "logits/rejected": -2.5940325260162354, |
| "logps/chosen": -160.98004150390625, |
| "logps/rejected": -164.4163360595703, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": 0.015476897358894348, |
| "rewards/margins": 0.006927810609340668, |
| "rewards/rejected": 0.008549087680876255, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 3.7330422317447686e-06, |
| "logits/chosen": -2.662424087524414, |
| "logits/rejected": -2.6424801349639893, |
| "logps/chosen": -180.03500366210938, |
| "logps/rejected": -174.4924774169922, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": 0.0384332574903965, |
| "rewards/margins": 0.005103477276861668, |
| "rewards/rejected": 0.03332977741956711, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3.1941000034687516e-06, |
| "logits/chosen": -2.6400465965270996, |
| "logits/rejected": -2.6263554096221924, |
| "logps/chosen": -167.74423217773438, |
| "logps/rejected": -171.72691345214844, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": -0.05802411586046219, |
| "rewards/margins": 0.008442241698503494, |
| "rewards/rejected": -0.06646636128425598, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 2.6171806561748503e-06, |
| "logits/chosen": -2.6121268272399902, |
| "logits/rejected": -2.5906381607055664, |
| "logps/chosen": -180.24801635742188, |
| "logps/rejected": -179.08216857910156, |
| "loss": 0.6883, |
| "rewards/accuracies": 0.534375011920929, |
| "rewards/chosen": -0.025833597406744957, |
| "rewards/margins": 0.0072801136411726475, |
| "rewards/rejected": -0.03311371058225632, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 2.0338498642707977e-06, |
| "logits/chosen": -2.663304090499878, |
| "logits/rejected": -2.61620831489563, |
| "logps/chosen": -175.11361694335938, |
| "logps/rejected": -171.87161254882812, |
| "loss": 0.6862, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.03003253974020481, |
| "rewards/margins": 0.019972536712884903, |
| "rewards/rejected": -0.05000507831573486, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.4760240991587338e-06, |
| "logits/chosen": -2.612849712371826, |
| "logits/rejected": -2.590980052947998, |
| "logps/chosen": -158.46450805664062, |
| "logps/rejected": -165.57606506347656, |
| "loss": 0.686, |
| "rewards/accuracies": 0.596875011920929, |
| "rewards/chosen": -0.00229537021368742, |
| "rewards/margins": 0.028590286150574684, |
| "rewards/rejected": -0.03088565543293953, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_logits/chosen": -2.5771172046661377, |
| "eval_logits/rejected": -2.4848170280456543, |
| "eval_logps/chosen": -307.7010192871094, |
| "eval_logps/rejected": -301.19329833984375, |
| "eval_loss": 0.683651864528656, |
| "eval_rewards/accuracies": 0.5720000267028809, |
| "eval_rewards/chosen": -0.09298302233219147, |
| "eval_rewards/margins": 0.020600860938429832, |
| "eval_rewards/rejected": -0.11358388513326645, |
| "eval_runtime": 382.0082, |
| "eval_samples_per_second": 5.235, |
| "eval_steps_per_second": 0.654, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.742243453755202e-07, |
| "logits/chosen": -2.6324477195739746, |
| "logits/rejected": -2.6193971633911133, |
| "logps/chosen": -169.78958129882812, |
| "logps/rejected": -178.68710327148438, |
| "loss": 0.69, |
| "rewards/accuracies": 0.528124988079071, |
| "rewards/chosen": -0.028186390176415443, |
| "rewards/margins": 0.006812813691794872, |
| "rewards/rejected": -0.03499920293688774, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 5.559061696656199e-07, |
| "logits/chosen": -2.6105129718780518, |
| "logits/rejected": -2.5832369327545166, |
| "logps/chosen": -177.0843505859375, |
| "logps/rejected": -179.75782775878906, |
| "loss": 0.6852, |
| "rewards/accuracies": 0.5531250238418579, |
| "rewards/chosen": -0.021660596132278442, |
| "rewards/margins": 0.016608651727437973, |
| "rewards/rejected": -0.038269251585006714, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2.4395751190352924e-07, |
| "logits/chosen": -2.590940475463867, |
| "logits/rejected": -2.5727646350860596, |
| "logps/chosen": -174.79104614257812, |
| "logps/rejected": -179.25521850585938, |
| "loss": 0.687, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.03851151093840599, |
| "rewards/margins": 0.011401178315281868, |
| "rewards/rejected": -0.04991268739104271, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 5.544639001763719e-08, |
| "logits/chosen": -2.6073455810546875, |
| "logits/rejected": -2.6085193157196045, |
| "logps/chosen": -189.36952209472656, |
| "logps/rejected": -190.8089141845703, |
| "loss": 0.688, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.04472886398434639, |
| "rewards/margins": 0.012851757928729057, |
| "rewards/rejected": -0.0575806125998497, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 149, |
| "total_flos": 0.0, |
| "train_loss": 0.6894321513656002, |
| "train_runtime": 6902.8391, |
| "train_samples_per_second": 2.767, |
| "train_steps_per_second": 0.022 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 149, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|