| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 78, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 35.16144561767578, |
| "learning_rate": 4.997482666353286e-07, |
| "logits/chosen": -4.892792224884033, |
| "logits/rejected": -4.889645099639893, |
| "logps/chosen": -109.9691162109375, |
| "logps/rejected": -118.3992919921875, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.4124999940395355, |
| "rewards/chosen": 0.005595622584223747, |
| "rewards/margins": 0.0018954325933009386, |
| "rewards/rejected": 0.0037001892924308777, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 33.09076690673828, |
| "learning_rate": 4.7014888296418447e-07, |
| "logits/chosen": -4.896066665649414, |
| "logits/rejected": -4.894492149353027, |
| "logps/chosen": -109.54179382324219, |
| "logps/rejected": -114.97733306884766, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.010609785094857216, |
| "rewards/margins": 0.0020811271388083696, |
| "rewards/rejected": -0.012690911069512367, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 30.685495376586914, |
| "learning_rate": 3.9694631307311825e-07, |
| "logits/chosen": -4.896415710449219, |
| "logits/rejected": -4.894046306610107, |
| "logps/chosen": -107.42914581298828, |
| "logps/rejected": -115.08940887451172, |
| "loss": 0.6812, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.007030453532934189, |
| "rewards/margins": 0.02506021037697792, |
| "rewards/rejected": -0.03209066763520241, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 32.84953308105469, |
| "learning_rate": 2.9463922369965915e-07, |
| "logits/chosen": -4.89896297454834, |
| "logits/rejected": -4.89543342590332, |
| "logps/chosen": -110.29290771484375, |
| "logps/rejected": -118.01644897460938, |
| "loss": 0.6726, |
| "rewards/accuracies": 0.6624999642372131, |
| "rewards/chosen": -0.025739433243870735, |
| "rewards/margins": 0.04335205629467964, |
| "rewards/rejected": -0.06909148395061493, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 30.98584747314453, |
| "learning_rate": 1.8349078860833124e-07, |
| "logits/chosen": -4.878735542297363, |
| "logits/rejected": -4.874364852905273, |
| "logps/chosen": -111.37123107910156, |
| "logps/rejected": -120.30442810058594, |
| "loss": 0.6678, |
| "rewards/accuracies": 0.6937500834465027, |
| "rewards/chosen": -0.05289134010672569, |
| "rewards/margins": 0.05404452234506607, |
| "rewards/rejected": -0.10693586617708206, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 28.358613967895508, |
| "learning_rate": 8.551531851507185e-08, |
| "logits/chosen": -4.883856773376465, |
| "logits/rejected": -4.879419326782227, |
| "logps/chosen": -109.68328094482422, |
| "logps/rejected": -116.46817779541016, |
| "loss": 0.6593, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": -0.046140156686306, |
| "rewards/margins": 0.07184445112943649, |
| "rewards/rejected": -0.11798460781574249, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 28.672161102294922, |
| "learning_rate": 2.0118056862137354e-08, |
| "logits/chosen": -4.915767669677734, |
| "logits/rejected": -4.912926197052002, |
| "logps/chosen": -107.08367919921875, |
| "logps/rejected": -116.36421966552734, |
| "loss": 0.6584, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.06524230539798737, |
| "rewards/margins": 0.07396502792835236, |
| "rewards/rejected": -0.13920733332633972, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 78, |
| "total_flos": 31921202790400.0, |
| "train_loss": 0.6738651104462452, |
| "train_runtime": 50989.9065, |
| "train_samples_per_second": 0.024, |
| "train_steps_per_second": 0.002 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 78, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 31921202790400.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|