| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9942196531791907, | |
| "eval_steps": 500, | |
| "global_step": 43, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06936416184971098, | |
| "grad_norm": 2.391996383666992, | |
| "learning_rate": 6e-07, | |
| "logits/chosen": -0.5742005109786987, | |
| "logits/rejected": -0.6095317602157593, | |
| "logps/chosen": -2.351984739303589, | |
| "logps/rejected": -7.036094665527344, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.1666666716337204, | |
| "rewards/chosen": -0.000525117851793766, | |
| "rewards/margins": -0.00012056056584697217, | |
| "rewards/rejected": -0.0004045573004987091, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.13872832369942195, | |
| "grad_norm": 2.0094051361083984, | |
| "learning_rate": 9.736842105263158e-07, | |
| "logits/chosen": -0.4133426547050476, | |
| "logits/rejected": -0.44120118021965027, | |
| "logps/chosen": -3.0542521476745605, | |
| "logps/rejected": -4.8423871994018555, | |
| "loss": 0.6962, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.006540569476783276, | |
| "rewards/margins": -0.0059411413967609406, | |
| "rewards/rejected": -0.0005994289531372488, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.20809248554913296, | |
| "grad_norm": 1.878108263015747, | |
| "learning_rate": 8.947368421052631e-07, | |
| "logits/chosen": -0.3521636426448822, | |
| "logits/rejected": -0.37608397006988525, | |
| "logps/chosen": -3.7146034240722656, | |
| "logps/rejected": -4.189270973205566, | |
| "loss": 0.6968, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": -0.007832100614905357, | |
| "rewards/margins": -0.007222745567560196, | |
| "rewards/rejected": -0.0006093545234762132, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.2774566473988439, | |
| "grad_norm": 2.917581796646118, | |
| "learning_rate": 8.157894736842105e-07, | |
| "logits/chosen": -0.4040801525115967, | |
| "logits/rejected": -0.43401893973350525, | |
| "logps/chosen": -3.188246488571167, | |
| "logps/rejected": -5.506511688232422, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.00289311446249485, | |
| "rewards/margins": -0.0010533285094425082, | |
| "rewards/rejected": 0.003946444019675255, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.3468208092485549, | |
| "grad_norm": 3.311497688293457, | |
| "learning_rate": 7.368421052631578e-07, | |
| "logits/chosen": -0.3706355690956116, | |
| "logits/rejected": -0.4231971502304077, | |
| "logps/chosen": -2.4997897148132324, | |
| "logps/rejected": -5.469825744628906, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0005652035470120609, | |
| "rewards/margins": 0.0003093027917202562, | |
| "rewards/rejected": 0.00025590075529180467, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.4161849710982659, | |
| "grad_norm": 1.4025365114212036, | |
| "learning_rate": 6.578947368421053e-07, | |
| "logits/chosen": -0.21497611701488495, | |
| "logits/rejected": -0.24890606105327606, | |
| "logps/chosen": -2.1334173679351807, | |
| "logps/rejected": -4.718679428100586, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": -0.0012112647527828813, | |
| "rewards/margins": 0.00380739476531744, | |
| "rewards/rejected": -0.005018658936023712, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.48554913294797686, | |
| "grad_norm": 2.704420328140259, | |
| "learning_rate": 5.789473684210526e-07, | |
| "logits/chosen": -0.3268830180168152, | |
| "logits/rejected": -0.36747950315475464, | |
| "logps/chosen": -3.465270519256592, | |
| "logps/rejected": -5.636588096618652, | |
| "loss": 0.6992, | |
| "rewards/accuracies": 0.2916666865348816, | |
| "rewards/chosen": -0.009067912586033344, | |
| "rewards/margins": -0.01196976751089096, | |
| "rewards/rejected": 0.0029018563218414783, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.5549132947976878, | |
| "grad_norm": 2.6874353885650635, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.34339720010757446, | |
| "logits/rejected": -0.3832819163799286, | |
| "logps/chosen": -2.2344460487365723, | |
| "logps/rejected": -4.726414680480957, | |
| "loss": 0.6943, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": 0.004240160807967186, | |
| "rewards/margins": -0.002244190312922001, | |
| "rewards/rejected": 0.006484351586550474, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.6242774566473989, | |
| "grad_norm": 4.569401741027832, | |
| "learning_rate": 4.2105263157894733e-07, | |
| "logits/chosen": -0.5243338346481323, | |
| "logits/rejected": -0.5355302691459656, | |
| "logps/chosen": -2.862309217453003, | |
| "logps/rejected": -4.5991034507751465, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.6666666865348816, | |
| "rewards/chosen": 0.004931977950036526, | |
| "rewards/margins": 0.008542876690626144, | |
| "rewards/rejected": -0.003610898507758975, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.6936416184971098, | |
| "grad_norm": 2.8011696338653564, | |
| "learning_rate": 3.4210526315789473e-07, | |
| "logits/chosen": -0.44834446907043457, | |
| "logits/rejected": -0.4534645080566406, | |
| "logps/chosen": -4.190229415893555, | |
| "logps/rejected": -5.262238502502441, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.5416666865348816, | |
| "rewards/chosen": 0.009879859164357185, | |
| "rewards/margins": 0.0019751894287765026, | |
| "rewards/rejected": 0.007904671132564545, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.7630057803468208, | |
| "grad_norm": 3.9597043991088867, | |
| "learning_rate": 2.631578947368421e-07, | |
| "logits/chosen": -0.3376007080078125, | |
| "logits/rejected": -0.3533641993999481, | |
| "logps/chosen": -3.724114179611206, | |
| "logps/rejected": -5.412773132324219, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5833333730697632, | |
| "rewards/chosen": -0.005668423604220152, | |
| "rewards/margins": 0.0014709922252222896, | |
| "rewards/rejected": -0.007139415945857763, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.8323699421965318, | |
| "grad_norm": 2.256436586380005, | |
| "learning_rate": 1.8421052631578946e-07, | |
| "logits/chosen": -0.30911755561828613, | |
| "logits/rejected": -0.31694161891937256, | |
| "logps/chosen": -2.6621932983398438, | |
| "logps/rejected": -4.157691478729248, | |
| "loss": 0.6959, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.00074323161970824, | |
| "rewards/margins": -0.005398334003984928, | |
| "rewards/rejected": 0.004655101802200079, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.9017341040462428, | |
| "grad_norm": 2.0652520656585693, | |
| "learning_rate": 1.0526315789473683e-07, | |
| "logits/chosen": -0.31367242336273193, | |
| "logits/rejected": -0.33622536063194275, | |
| "logps/chosen": -3.3133456707000732, | |
| "logps/rejected": -6.765153408050537, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.01026303879916668, | |
| "rewards/margins": 0.0005472122575156391, | |
| "rewards/rejected": -0.010810251347720623, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.9710982658959537, | |
| "grad_norm": 2.498948335647583, | |
| "learning_rate": 2.6315789473684208e-08, | |
| "logits/chosen": -0.3583389222621918, | |
| "logits/rejected": -0.3821001946926117, | |
| "logps/chosen": -3.387718677520752, | |
| "logps/rejected": -5.769498348236084, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.5416666865348816, | |
| "rewards/chosen": 0.0014158273115754128, | |
| "rewards/margins": 0.0019499189220368862, | |
| "rewards/rejected": -0.0005340920761227608, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.9942196531791907, | |
| "step": 43, | |
| "total_flos": 8626483251118080.0, | |
| "train_loss": 0.6937350367390832, | |
| "train_runtime": 231.1609, | |
| "train_samples_per_second": 1.492, | |
| "train_steps_per_second": 0.186 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 43, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8626483251118080.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |