| { | |
| "best_global_step": 9000, | |
| "best_metric": 0.04935265704989433, | |
| "best_model_checkpoint": "./training_output/checkpoint-9000", | |
| "epoch": 0.5, | |
| "eval_steps": 1000, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 0.8834348917007446, | |
| "learning_rate": 1.9501000000000002e-05, | |
| "loss": 0.1437, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 9.513919830322266, | |
| "learning_rate": 1.9001e-05, | |
| "loss": 0.1085, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.91835, | |
| "eval_loss": 0.10802757740020752, | |
| "eval_runtime": 381.1922, | |
| "eval_samples_per_second": 52.467, | |
| "eval_steps_per_second": 3.279, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 0.013394818641245365, | |
| "learning_rate": 1.8501e-05, | |
| "loss": 0.0965, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.017180055379867554, | |
| "learning_rate": 1.8001000000000003e-05, | |
| "loss": 0.0716, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.8473, | |
| "eval_loss": 0.25237375497817993, | |
| "eval_runtime": 381.1616, | |
| "eval_samples_per_second": 52.471, | |
| "eval_steps_per_second": 3.279, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 0.017619747668504715, | |
| "learning_rate": 1.7501e-05, | |
| "loss": 0.0658, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.527113676071167, | |
| "learning_rate": 1.7001000000000002e-05, | |
| "loss": 0.0615, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.9299, | |
| "eval_loss": 0.11818733811378479, | |
| "eval_runtime": 381.4889, | |
| "eval_samples_per_second": 52.426, | |
| "eval_steps_per_second": 3.277, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 140.78282165527344, | |
| "learning_rate": 1.6501e-05, | |
| "loss": 0.056, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.9989501237869263, | |
| "learning_rate": 1.6001e-05, | |
| "loss": 0.0648, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.9498, | |
| "eval_loss": 0.07567641884088516, | |
| "eval_runtime": 380.8034, | |
| "eval_samples_per_second": 52.521, | |
| "eval_steps_per_second": 3.283, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 0.01812303625047207, | |
| "learning_rate": 1.5501000000000003e-05, | |
| "loss": 0.0487, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.05552659556269646, | |
| "learning_rate": 1.5001000000000001e-05, | |
| "loss": 0.0522, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.92725, | |
| "eval_loss": 0.12006673216819763, | |
| "eval_runtime": 380.6188, | |
| "eval_samples_per_second": 52.546, | |
| "eval_steps_per_second": 3.284, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 0.14319103956222534, | |
| "learning_rate": 1.4501e-05, | |
| "loss": 0.0554, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.012562028132379055, | |
| "learning_rate": 1.4001e-05, | |
| "loss": 0.0377, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.95545, | |
| "eval_loss": 0.08464282751083374, | |
| "eval_runtime": 380.6212, | |
| "eval_samples_per_second": 52.546, | |
| "eval_steps_per_second": 3.284, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.325, | |
| "grad_norm": 0.0012805273290723562, | |
| "learning_rate": 1.3501000000000002e-05, | |
| "loss": 0.0327, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.024555200710892677, | |
| "learning_rate": 1.3001000000000001e-05, | |
| "loss": 0.0447, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.93225, | |
| "eval_loss": 0.10355959832668304, | |
| "eval_runtime": 380.8232, | |
| "eval_samples_per_second": 52.518, | |
| "eval_steps_per_second": 3.282, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 0.10476179420948029, | |
| "learning_rate": 1.2501000000000001e-05, | |
| "loss": 0.0379, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.16991844773292542, | |
| "learning_rate": 1.2001e-05, | |
| "loss": 0.0421, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.89145, | |
| "eval_loss": 0.1804238110780716, | |
| "eval_runtime": 380.9125, | |
| "eval_samples_per_second": 52.505, | |
| "eval_steps_per_second": 3.282, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.425, | |
| "grad_norm": 0.017685526981949806, | |
| "learning_rate": 1.1501e-05, | |
| "loss": 0.0384, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 16.736478805541992, | |
| "learning_rate": 1.1001000000000002e-05, | |
| "loss": 0.0364, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.96285, | |
| "eval_loss": 0.04935265704989433, | |
| "eval_runtime": 380.5923, | |
| "eval_samples_per_second": 52.55, | |
| "eval_steps_per_second": 3.284, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.475, | |
| "grad_norm": 0.08418703079223633, | |
| "learning_rate": 1.0501000000000002e-05, | |
| "loss": 0.0262, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.19593098759651184, | |
| "learning_rate": 1.0001000000000001e-05, | |
| "loss": 0.0301, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.9689, | |
| "eval_loss": 0.05831901729106903, | |
| "eval_runtime": 380.4459, | |
| "eval_samples_per_second": 52.57, | |
| "eval_steps_per_second": 3.286, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.05244422144e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |