| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8, |
| "eval_steps": 50, |
| "global_step": 50, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.3149960041046143, |
| "learning_rate": 5e-06, |
| "logits/chosen": -0.24067819118499756, |
| "logits/rejected": -0.4968351423740387, |
| "logps/chosen": -433.0858459472656, |
| "logps/rejected": -68.33470153808594, |
| "loss": 0.1722, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1653084754943848, |
| "rewards/margins": 1.9695370197296143, |
| "rewards/rejected": 0.19577142596244812, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.4687750339508057, |
| "learning_rate": 1e-05, |
| "logits/chosen": -0.23465164005756378, |
| "logits/rejected": -0.5149508118629456, |
| "logps/chosen": -433.3921813964844, |
| "logps/rejected": -65.57392883300781, |
| "loss": 0.1348, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 2.6356241703033447, |
| "rewards/margins": 2.478445529937744, |
| "rewards/rejected": 0.15717869997024536, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.4518139958381653, |
| "learning_rate": 1.5e-05, |
| "logits/chosen": -0.22109150886535645, |
| "logits/rejected": -0.5422734022140503, |
| "logps/chosen": -421.8102111816406, |
| "logps/rejected": -68.03514099121094, |
| "loss": 0.0432, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.584801197052002, |
| "rewards/margins": 3.5185985565185547, |
| "rewards/rejected": 0.06620248407125473, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.12663038074970245, |
| "learning_rate": 2e-05, |
| "logits/chosen": -0.21289744973182678, |
| "logits/rejected": -0.5218192338943481, |
| "logps/chosen": -414.16058349609375, |
| "logps/rejected": -72.08072662353516, |
| "loss": 0.0138, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.662674903869629, |
| "rewards/margins": 4.634940147399902, |
| "rewards/rejected": 0.02773415483534336, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.043434809893369675, |
| "learning_rate": 2.5e-05, |
| "logits/chosen": -0.1724880337715149, |
| "logits/rejected": -0.632266640663147, |
| "logps/chosen": -391.53204345703125, |
| "logps/rejected": -71.50444030761719, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.770969390869141, |
| "rewards/margins": 5.899745464324951, |
| "rewards/rejected": -0.12877611815929413, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.012639075517654419, |
| "learning_rate": 3e-05, |
| "logits/chosen": -0.11160198599100113, |
| "logits/rejected": -0.46575218439102173, |
| "logps/chosen": -387.0035095214844, |
| "logps/rejected": -68.87992095947266, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.1022233963012695, |
| "rewards/margins": 7.20810604095459, |
| "rewards/rejected": -0.10588403791189194, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.008335842750966549, |
| "learning_rate": 3.5e-05, |
| "logits/chosen": -0.16194215416908264, |
| "logits/rejected": -0.5091412663459778, |
| "logps/chosen": -380.4056701660156, |
| "logps/rejected": -70.05772399902344, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.549788475036621, |
| "rewards/margins": 7.717337608337402, |
| "rewards/rejected": -0.16754867136478424, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.0034919867757707834, |
| "learning_rate": 4e-05, |
| "logits/chosen": -0.12451864778995514, |
| "logits/rejected": -0.4964370131492615, |
| "logps/chosen": -379.21673583984375, |
| "logps/rejected": -69.48336029052734, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.368996620178223, |
| "rewards/margins": 8.601344108581543, |
| "rewards/rejected": -0.23234805464744568, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.0023931912146508694, |
| "learning_rate": 4.5e-05, |
| "logits/chosen": -0.11491024494171143, |
| "logits/rejected": -0.5707582831382751, |
| "logps/chosen": -367.6617431640625, |
| "logps/rejected": -71.28264617919922, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.458444595336914, |
| "rewards/margins": 8.877108573913574, |
| "rewards/rejected": -0.41866397857666016, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.002425891114398837, |
| "learning_rate": 5e-05, |
| "logits/chosen": -0.1450928896665573, |
| "logits/rejected": -0.6191288232803345, |
| "logps/chosen": -371.6398620605469, |
| "logps/rejected": -75.66804504394531, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.693218231201172, |
| "rewards/margins": 9.168657302856445, |
| "rewards/rejected": -0.4754392206668854, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_logits/chosen": -0.09945501387119293, |
| "eval_logits/rejected": -0.5488065481185913, |
| "eval_logps/chosen": -366.370849609375, |
| "eval_logps/rejected": -72.13478088378906, |
| "eval_loss": 0.0001304554898524657, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 8.9785737991333, |
| "eval_rewards/margins": 9.362648963928223, |
| "eval_rewards/rejected": -0.3840752840042114, |
| "eval_runtime": 31.4719, |
| "eval_samples_per_second": 3.177, |
| "eval_steps_per_second": 0.794, |
| "step": 50 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 186, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|