{ "best_global_step": 9000, "best_metric": 0.04935265704989433, "best_model_checkpoint": "./training_output/checkpoint-9000", "epoch": 0.5, "eval_steps": 1000, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025, "grad_norm": 0.8834348917007446, "learning_rate": 1.9501000000000002e-05, "loss": 0.1437, "step": 500 }, { "epoch": 0.05, "grad_norm": 9.513919830322266, "learning_rate": 1.9001e-05, "loss": 0.1085, "step": 1000 }, { "epoch": 0.05, "eval_accuracy": 0.91835, "eval_loss": 0.10802757740020752, "eval_runtime": 381.1922, "eval_samples_per_second": 52.467, "eval_steps_per_second": 3.279, "step": 1000 }, { "epoch": 0.075, "grad_norm": 0.013394818641245365, "learning_rate": 1.8501e-05, "loss": 0.0965, "step": 1500 }, { "epoch": 0.1, "grad_norm": 0.017180055379867554, "learning_rate": 1.8001000000000003e-05, "loss": 0.0716, "step": 2000 }, { "epoch": 0.1, "eval_accuracy": 0.8473, "eval_loss": 0.25237375497817993, "eval_runtime": 381.1616, "eval_samples_per_second": 52.471, "eval_steps_per_second": 3.279, "step": 2000 }, { "epoch": 0.125, "grad_norm": 0.017619747668504715, "learning_rate": 1.7501e-05, "loss": 0.0658, "step": 2500 }, { "epoch": 0.15, "grad_norm": 0.527113676071167, "learning_rate": 1.7001000000000002e-05, "loss": 0.0615, "step": 3000 }, { "epoch": 0.15, "eval_accuracy": 0.9299, "eval_loss": 0.11818733811378479, "eval_runtime": 381.4889, "eval_samples_per_second": 52.426, "eval_steps_per_second": 3.277, "step": 3000 }, { "epoch": 0.175, "grad_norm": 140.78282165527344, "learning_rate": 1.6501e-05, "loss": 0.056, "step": 3500 }, { "epoch": 0.2, "grad_norm": 0.9989501237869263, "learning_rate": 1.6001e-05, "loss": 0.0648, "step": 4000 }, { "epoch": 0.2, "eval_accuracy": 0.9498, "eval_loss": 0.07567641884088516, "eval_runtime": 380.8034, "eval_samples_per_second": 52.521, "eval_steps_per_second": 3.283, "step": 4000 }, { "epoch": 0.225, "grad_norm": 0.01812303625047207, "learning_rate": 1.5501000000000003e-05, "loss": 0.0487, "step": 4500 }, { "epoch": 0.25, "grad_norm": 0.05552659556269646, "learning_rate": 1.5001000000000001e-05, "loss": 0.0522, "step": 5000 }, { "epoch": 0.25, "eval_accuracy": 0.92725, "eval_loss": 0.12006673216819763, "eval_runtime": 380.6188, "eval_samples_per_second": 52.546, "eval_steps_per_second": 3.284, "step": 5000 }, { "epoch": 0.275, "grad_norm": 0.14319103956222534, "learning_rate": 1.4501e-05, "loss": 0.0554, "step": 5500 }, { "epoch": 0.3, "grad_norm": 0.012562028132379055, "learning_rate": 1.4001e-05, "loss": 0.0377, "step": 6000 }, { "epoch": 0.3, "eval_accuracy": 0.95545, "eval_loss": 0.08464282751083374, "eval_runtime": 380.6212, "eval_samples_per_second": 52.546, "eval_steps_per_second": 3.284, "step": 6000 }, { "epoch": 0.325, "grad_norm": 0.0012805273290723562, "learning_rate": 1.3501000000000002e-05, "loss": 0.0327, "step": 6500 }, { "epoch": 0.35, "grad_norm": 0.024555200710892677, "learning_rate": 1.3001000000000001e-05, "loss": 0.0447, "step": 7000 }, { "epoch": 0.35, "eval_accuracy": 0.93225, "eval_loss": 0.10355959832668304, "eval_runtime": 380.8232, "eval_samples_per_second": 52.518, "eval_steps_per_second": 3.282, "step": 7000 }, { "epoch": 0.375, "grad_norm": 0.10476179420948029, "learning_rate": 1.2501000000000001e-05, "loss": 0.0379, "step": 7500 }, { "epoch": 0.4, "grad_norm": 0.16991844773292542, "learning_rate": 1.2001e-05, "loss": 0.0421, "step": 8000 }, { "epoch": 0.4, "eval_accuracy": 0.89145, "eval_loss": 0.1804238110780716, "eval_runtime": 380.9125, "eval_samples_per_second": 52.505, "eval_steps_per_second": 3.282, "step": 8000 }, { "epoch": 0.425, "grad_norm": 0.017685526981949806, "learning_rate": 1.1501e-05, "loss": 0.0384, "step": 8500 }, { "epoch": 0.45, "grad_norm": 16.736478805541992, "learning_rate": 1.1001000000000002e-05, "loss": 0.0364, "step": 9000 }, { "epoch": 0.45, "eval_accuracy": 0.96285, "eval_loss": 0.04935265704989433, "eval_runtime": 380.5923, "eval_samples_per_second": 52.55, "eval_steps_per_second": 3.284, "step": 9000 }, { "epoch": 0.475, "grad_norm": 0.08418703079223633, "learning_rate": 1.0501000000000002e-05, "loss": 0.0262, "step": 9500 }, { "epoch": 0.5, "grad_norm": 0.19593098759651184, "learning_rate": 1.0001000000000001e-05, "loss": 0.0301, "step": 10000 }, { "epoch": 0.5, "eval_accuracy": 0.9689, "eval_loss": 0.05831901729106903, "eval_runtime": 380.4459, "eval_samples_per_second": 52.57, "eval_steps_per_second": 3.286, "step": 10000 } ], "logging_steps": 500, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.05244422144e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }