| { |
| "best_global_step": 4000, |
| "best_metric": 0.07567641884088516, |
| "best_model_checkpoint": "./training_output/checkpoint-4000", |
| "epoch": 0.35, |
| "eval_steps": 1000, |
| "global_step": 7000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.8834348917007446, |
| "learning_rate": 1.9501000000000002e-05, |
| "loss": 0.1437, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 9.513919830322266, |
| "learning_rate": 1.9001e-05, |
| "loss": 0.1085, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 0.91835, |
| "eval_loss": 0.10802757740020752, |
| "eval_runtime": 381.1922, |
| "eval_samples_per_second": 52.467, |
| "eval_steps_per_second": 3.279, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.013394818641245365, |
| "learning_rate": 1.8501e-05, |
| "loss": 0.0965, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.017180055379867554, |
| "learning_rate": 1.8001000000000003e-05, |
| "loss": 0.0716, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 0.8473, |
| "eval_loss": 0.25237375497817993, |
| "eval_runtime": 381.1616, |
| "eval_samples_per_second": 52.471, |
| "eval_steps_per_second": 3.279, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.017619747668504715, |
| "learning_rate": 1.7501e-05, |
| "loss": 0.0658, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.527113676071167, |
| "learning_rate": 1.7001000000000002e-05, |
| "loss": 0.0615, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_accuracy": 0.9299, |
| "eval_loss": 0.11818733811378479, |
| "eval_runtime": 381.4889, |
| "eval_samples_per_second": 52.426, |
| "eval_steps_per_second": 3.277, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 140.78282165527344, |
| "learning_rate": 1.6501e-05, |
| "loss": 0.056, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.9989501237869263, |
| "learning_rate": 1.6001e-05, |
| "loss": 0.0648, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 0.9498, |
| "eval_loss": 0.07567641884088516, |
| "eval_runtime": 380.8034, |
| "eval_samples_per_second": 52.521, |
| "eval_steps_per_second": 3.283, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.01812303625047207, |
| "learning_rate": 1.5501000000000003e-05, |
| "loss": 0.0487, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.05552659556269646, |
| "learning_rate": 1.5001000000000001e-05, |
| "loss": 0.0522, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 0.92725, |
| "eval_loss": 0.12006673216819763, |
| "eval_runtime": 380.6188, |
| "eval_samples_per_second": 52.546, |
| "eval_steps_per_second": 3.284, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.14319103956222534, |
| "learning_rate": 1.4501e-05, |
| "loss": 0.0554, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.012562028132379055, |
| "learning_rate": 1.4001e-05, |
| "loss": 0.0377, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_accuracy": 0.95545, |
| "eval_loss": 0.08464282751083374, |
| "eval_runtime": 380.6212, |
| "eval_samples_per_second": 52.546, |
| "eval_steps_per_second": 3.284, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.0012805273290723562, |
| "learning_rate": 1.3501000000000002e-05, |
| "loss": 0.0327, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.024555200710892677, |
| "learning_rate": 1.3001000000000001e-05, |
| "loss": 0.0447, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_accuracy": 0.93225, |
| "eval_loss": 0.10355959832668304, |
| "eval_runtime": 380.8232, |
| "eval_samples_per_second": 52.518, |
| "eval_steps_per_second": 3.282, |
| "step": 7000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7367109550080000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|