{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1282051282051282, "grad_norm": 35.16144561767578, "learning_rate": 4.997482666353286e-07, "logits/chosen": -4.892792224884033, "logits/rejected": -4.889645099639893, "logps/chosen": -109.9691162109375, "logps/rejected": -118.3992919921875, "loss": 0.6925, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": 0.005595622584223747, "rewards/margins": 0.0018954325933009386, "rewards/rejected": 0.0037001892924308777, "step": 10 }, { "epoch": 0.2564102564102564, "grad_norm": 33.09076690673828, "learning_rate": 4.7014888296418447e-07, "logits/chosen": -4.896066665649414, "logits/rejected": -4.894492149353027, "logps/chosen": -109.54179382324219, "logps/rejected": -114.97733306884766, "loss": 0.6925, "rewards/accuracies": 0.53125, "rewards/chosen": -0.010609785094857216, "rewards/margins": 0.0020811271388083696, "rewards/rejected": -0.012690911069512367, "step": 20 }, { "epoch": 0.38461538461538464, "grad_norm": 30.685495376586914, "learning_rate": 3.9694631307311825e-07, "logits/chosen": -4.896415710449219, "logits/rejected": -4.894046306610107, "logps/chosen": -107.42914581298828, "logps/rejected": -115.08940887451172, "loss": 0.6812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.007030453532934189, "rewards/margins": 0.02506021037697792, "rewards/rejected": -0.03209066763520241, "step": 30 }, { "epoch": 0.5128205128205128, "grad_norm": 32.84953308105469, "learning_rate": 2.9463922369965915e-07, "logits/chosen": -4.89896297454834, "logits/rejected": -4.89543342590332, "logps/chosen": -110.29290771484375, "logps/rejected": -118.01644897460938, "loss": 0.6726, "rewards/accuracies": 0.6624999642372131, "rewards/chosen": -0.025739433243870735, "rewards/margins": 0.04335205629467964, "rewards/rejected": -0.06909148395061493, "step": 40 }, { "epoch": 0.6410256410256411, "grad_norm": 30.98584747314453, "learning_rate": 1.8349078860833124e-07, "logits/chosen": -4.878735542297363, "logits/rejected": -4.874364852905273, "logps/chosen": -111.37123107910156, "logps/rejected": -120.30442810058594, "loss": 0.6678, "rewards/accuracies": 0.6937500834465027, "rewards/chosen": -0.05289134010672569, "rewards/margins": 0.05404452234506607, "rewards/rejected": -0.10693586617708206, "step": 50 }, { "epoch": 0.7692307692307693, "grad_norm": 28.358613967895508, "learning_rate": 8.551531851507185e-08, "logits/chosen": -4.883856773376465, "logits/rejected": -4.879419326782227, "logps/chosen": -109.68328094482422, "logps/rejected": -116.46817779541016, "loss": 0.6593, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.046140156686306, "rewards/margins": 0.07184445112943649, "rewards/rejected": -0.11798460781574249, "step": 60 }, { "epoch": 0.8974358974358975, "grad_norm": 28.672161102294922, "learning_rate": 2.0118056862137354e-08, "logits/chosen": -4.915767669677734, "logits/rejected": -4.912926197052002, "logps/chosen": -107.08367919921875, "logps/rejected": -116.36421966552734, "loss": 0.6584, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06524230539798737, "rewards/margins": 0.07396502792835236, "rewards/rejected": -0.13920733332633972, "step": 70 }, { "epoch": 1.0, "step": 78, "total_flos": 31921202790400.0, "train_loss": 0.6738651104462452, "train_runtime": 50989.9065, "train_samples_per_second": 0.024, "train_steps_per_second": 0.002 } ], "logging_steps": 10, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 31921202790400.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }