| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.7209302325581395, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.37209302325581395, |
| "grad_norm": 0.9763292670249939, |
| "learning_rate": 1.9230769230769234e-06, |
| "logits/chosen": 1.7089074850082397, |
| "logits/rejected": 1.7024719715118408, |
| "logps/chosen": -80.31088256835938, |
| "logps/rejected": -91.45267486572266, |
| "loss": 0.692, |
| "rewards/accuracies": 0.38749998807907104, |
| "rewards/chosen": -0.0024384786374866962, |
| "rewards/margins": -0.0009521525353193283, |
| "rewards/rejected": -0.001486325403675437, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.7441860465116279, |
| "grad_norm": 0.5258393883705139, |
| "learning_rate": 3.846153846153847e-06, |
| "logits/chosen": 1.8638836145401, |
| "logits/rejected": 1.8883317708969116, |
| "logps/chosen": -81.79865264892578, |
| "logps/rejected": -76.99979400634766, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.00844244472682476, |
| "rewards/margins": 0.0048658037558197975, |
| "rewards/rejected": 0.0035766414366662502, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.1162790697674418, |
| "grad_norm": 0.5254527926445007, |
| "learning_rate": 4.996395926410354e-06, |
| "logits/chosen": 1.7035324573516846, |
| "logits/rejected": 1.731885552406311, |
| "logps/chosen": -80.90572357177734, |
| "logps/rejected": -82.523193359375, |
| "loss": 0.6945, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": -0.003143525216728449, |
| "rewards/margins": -0.0011914735659956932, |
| "rewards/rejected": -0.0019520517671480775, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.4883720930232558, |
| "grad_norm": 0.5585340261459351, |
| "learning_rate": 4.955969343539162e-06, |
| "logits/chosen": 1.7126038074493408, |
| "logits/rejected": 1.7729876041412354, |
| "logps/chosen": -82.4004898071289, |
| "logps/rejected": -86.97776794433594, |
| "loss": 0.6942, |
| "rewards/accuracies": 0.36250001192092896, |
| "rewards/chosen": -0.005664472468197346, |
| "rewards/margins": -0.014465728774666786, |
| "rewards/rejected": 0.00880125630646944, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.8604651162790697, |
| "grad_norm": 0.46961840987205505, |
| "learning_rate": 4.8713411048678635e-06, |
| "logits/chosen": 1.7948715686798096, |
| "logits/rejected": 1.836954116821289, |
| "logps/chosen": -87.7071304321289, |
| "logps/rejected": -75.44202423095703, |
| "loss": 0.693, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": 0.0024648040998727083, |
| "rewards/margins": -0.004807753954082727, |
| "rewards/rejected": 0.007272557821124792, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.2325581395348837, |
| "grad_norm": 0.48951366543769836, |
| "learning_rate": 4.744034319097536e-06, |
| "logits/chosen": 1.8686463832855225, |
| "logits/rejected": 1.831539511680603, |
| "logps/chosen": -97.39242553710938, |
| "logps/rejected": -83.82582092285156, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.006232857704162598, |
| "rewards/margins": 0.006834377534687519, |
| "rewards/rejected": -0.0006015209364704788, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.604651162790698, |
| "grad_norm": 0.5472654700279236, |
| "learning_rate": 4.5763402081200295e-06, |
| "logits/chosen": 1.7030988931655884, |
| "logits/rejected": 1.7544777393341064, |
| "logps/chosen": -76.77324676513672, |
| "logps/rejected": -74.05345153808594, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.005271740257740021, |
| "rewards/margins": -0.0019098047632724047, |
| "rewards/rejected": -0.0033619359601289034, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.9767441860465116, |
| "grad_norm": 0.5428044199943542, |
| "learning_rate": 4.3712768704277535e-06, |
| "logits/chosen": 1.7908856868743896, |
| "logits/rejected": 1.8412494659423828, |
| "logps/chosen": -81.26551818847656, |
| "logps/rejected": -90.65868377685547, |
| "loss": 0.6903, |
| "rewards/accuracies": 0.4124999940395355, |
| "rewards/chosen": -0.0007023714715614915, |
| "rewards/margins": -0.004734506830573082, |
| "rewards/rejected": 0.004032135009765625, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.3488372093023258, |
| "grad_norm": 0.5832298994064331, |
| "learning_rate": 4.1325349624589625e-06, |
| "logits/chosen": 1.6744792461395264, |
| "logits/rejected": 1.7848085165023804, |
| "logps/chosen": -69.33172607421875, |
| "logps/rejected": -89.09760284423828, |
| "loss": 0.6887, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.001944942632690072, |
| "rewards/margins": 0.011368460953235626, |
| "rewards/rejected": -0.009423518553376198, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.7209302325581395, |
| "grad_norm": 0.5550350546836853, |
| "learning_rate": 3.8644112754862614e-06, |
| "logits/chosen": 1.8829429149627686, |
| "logits/rejected": 1.8508192300796509, |
| "logps/chosen": -87.32453918457031, |
| "logps/rejected": -90.84036254882812, |
| "loss": 0.6894, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.0012000806163996458, |
| "rewards/margins": 0.0026218085549771786, |
| "rewards/rejected": -0.0038218882400542498, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 260, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.4077597671358464e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|