| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 30, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.7782864570617676, |
| "learning_rate": 9.966191788709714e-07, |
| "log_odds_chosen": 0.7443415522575378, |
| "log_odds_ratio": -0.4570655822753906, |
| "logits/chosen": 3.981205701828003, |
| "logits/rejected": 4.507437705993652, |
| "logps/chosen": -1.3964179754257202, |
| "logps/rejected": -2.015876054763794, |
| "loss": 2.3017, |
| "nll_loss": 2.2559781074523926, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.13964179158210754, |
| "rewards/margins": 0.06194580718874931, |
| "rewards/rejected": -0.20158760249614716, |
| "step": 5 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.1227214336395264, |
| "learning_rate": 8.83022221559489e-07, |
| "log_odds_chosen": 0.642013669013977, |
| "log_odds_ratio": -0.48525574803352356, |
| "logits/chosen": 4.06285285949707, |
| "logits/rejected": 4.569836139678955, |
| "logps/chosen": -1.4217931032180786, |
| "logps/rejected": -1.9608324766159058, |
| "loss": 2.3875, |
| "nll_loss": 2.2945656776428223, |
| "rewards/accuracies": 0.7428571581840515, |
| "rewards/chosen": -0.14217932522296906, |
| "rewards/margins": 0.05390391871333122, |
| "rewards/rejected": -0.19608324766159058, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 0.7834094762802124, |
| "learning_rate": 6.434016163555451e-07, |
| "log_odds_chosen": 0.8146808743476868, |
| "log_odds_ratio": -0.4321725368499756, |
| "logits/chosen": 4.006339073181152, |
| "logits/rejected": 4.592737197875977, |
| "logps/chosen": -1.3805153369903564, |
| "logps/rejected": -2.061169147491455, |
| "loss": 2.3073, |
| "nll_loss": 2.2640388011932373, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.13805153965950012, |
| "rewards/margins": 0.06806538999080658, |
| "rewards/rejected": -0.2061169445514679, |
| "step": 15 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8477768301963806, |
| "learning_rate": 3.56598383644455e-07, |
| "log_odds_chosen": 0.5591691136360168, |
| "log_odds_ratio": -0.5144891738891602, |
| "logits/chosen": 4.0338454246521, |
| "logits/rejected": 4.464445114135742, |
| "logps/chosen": -1.436565637588501, |
| "logps/rejected": -1.9033842086791992, |
| "loss": 2.3271, |
| "nll_loss": 2.283417224884033, |
| "rewards/accuracies": 0.7428571581840515, |
| "rewards/chosen": -0.14365655183792114, |
| "rewards/margins": 0.046681854873895645, |
| "rewards/rejected": -0.19033841788768768, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 0.7918501496315002, |
| "learning_rate": 1.1697777844051104e-07, |
| "log_odds_chosen": 0.7850446105003357, |
| "log_odds_ratio": -0.4418957233428955, |
| "logits/chosen": 3.9244980812072754, |
| "logits/rejected": 4.572809219360352, |
| "logps/chosen": -1.3659677505493164, |
| "logps/rejected": -2.012371301651001, |
| "loss": 2.2538, |
| "nll_loss": 2.2095985412597656, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.13659678399562836, |
| "rewards/margins": 0.0646403506398201, |
| "rewards/rejected": -0.20123712718486786, |
| "step": 25 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.1368457078933716, |
| "learning_rate": 3.380821129028488e-09, |
| "log_odds_chosen": 0.5917825102806091, |
| "log_odds_ratio": -0.5039272904396057, |
| "logits/chosen": 4.121052265167236, |
| "logits/rejected": 4.495968818664551, |
| "logps/chosen": -1.455165982246399, |
| "logps/rejected": -1.9601490497589111, |
| "loss": 2.3857, |
| "nll_loss": 2.3466548919677734, |
| "rewards/accuracies": 0.6857143044471741, |
| "rewards/chosen": -0.14551660418510437, |
| "rewards/margins": 0.05049830302596092, |
| "rewards/rejected": -0.1960149109363556, |
| "step": 30 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 30, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|