{ "best_metric": 0.88608, "best_model_checkpoint": "../../checkpoint/imdb/bert-base/checkpoint-12512", "epoch": 20.0, "eval_steps": 500, "global_step": 15640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.88012, "eval_loss": 0.2792210280895233, "eval_runtime": 34.1095, "eval_samples_per_second": 732.933, "eval_steps_per_second": 2.873, "step": 782 }, { "epoch": 1.28, "learning_rate": 4.680306905370844e-05, "loss": 0.307, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.86836, "eval_loss": 0.3073737621307373, "eval_runtime": 34.1017, "eval_samples_per_second": 733.1, "eval_steps_per_second": 2.874, "step": 1564 }, { "epoch": 2.56, "learning_rate": 4.360613810741688e-05, "loss": 0.1491, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.879, "eval_loss": 0.5035193562507629, "eval_runtime": 33.9308, "eval_samples_per_second": 736.793, "eval_steps_per_second": 2.888, "step": 2346 }, { "epoch": 3.84, "learning_rate": 4.040920716112532e-05, "loss": 0.0719, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.88072, "eval_loss": 0.5898228287696838, "eval_runtime": 34.0598, "eval_samples_per_second": 734.004, "eval_steps_per_second": 2.877, "step": 3128 }, { "epoch": 5.0, "eval_accuracy": 0.8714, "eval_loss": 0.763311505317688, "eval_runtime": 34.1843, "eval_samples_per_second": 731.33, "eval_steps_per_second": 2.867, "step": 3910 }, { "epoch": 5.12, "learning_rate": 3.721227621483376e-05, "loss": 0.0366, "step": 4000 }, { "epoch": 6.0, "eval_accuracy": 0.87656, "eval_loss": 0.6611727476119995, "eval_runtime": 34.137, "eval_samples_per_second": 732.344, "eval_steps_per_second": 2.871, "step": 4692 }, { "epoch": 6.39, "learning_rate": 3.40153452685422e-05, "loss": 0.0308, "step": 5000 }, { "epoch": 7.0, "eval_accuracy": 0.88068, "eval_loss": 0.642299234867096, "eval_runtime": 34.2471, "eval_samples_per_second": 729.99, "eval_steps_per_second": 2.862, "step": 5474 }, { "epoch": 7.67, "learning_rate": 3.081841432225064e-05, "loss": 0.0237, "step": 6000 }, { "epoch": 8.0, "eval_accuracy": 0.87888, "eval_loss": 0.9059407711029053, "eval_runtime": 34.0813, "eval_samples_per_second": 733.539, "eval_steps_per_second": 2.875, "step": 6256 }, { "epoch": 8.95, "learning_rate": 2.7621483375959077e-05, "loss": 0.015, "step": 7000 }, { "epoch": 9.0, "eval_accuracy": 0.8794, "eval_loss": 0.8373117446899414, "eval_runtime": 34.0748, "eval_samples_per_second": 733.679, "eval_steps_per_second": 2.876, "step": 7038 }, { "epoch": 10.0, "eval_accuracy": 0.879, "eval_loss": 0.9557542204856873, "eval_runtime": 34.1311, "eval_samples_per_second": 732.469, "eval_steps_per_second": 2.871, "step": 7820 }, { "epoch": 10.23, "learning_rate": 2.442455242966752e-05, "loss": 0.0155, "step": 8000 }, { "epoch": 11.0, "eval_accuracy": 0.87836, "eval_loss": 1.0240801572799683, "eval_runtime": 33.9654, "eval_samples_per_second": 736.042, "eval_steps_per_second": 2.885, "step": 8602 }, { "epoch": 11.51, "learning_rate": 2.122762148337596e-05, "loss": 0.01, "step": 9000 }, { "epoch": 12.0, "eval_accuracy": 0.88172, "eval_loss": 0.9570114016532898, "eval_runtime": 34.077, "eval_samples_per_second": 733.633, "eval_steps_per_second": 2.876, "step": 9384 }, { "epoch": 12.79, "learning_rate": 1.80306905370844e-05, "loss": 0.009, "step": 10000 }, { "epoch": 13.0, "eval_accuracy": 0.87288, "eval_loss": 1.0905340909957886, "eval_runtime": 34.2575, "eval_samples_per_second": 729.768, "eval_steps_per_second": 2.861, "step": 10166 }, { "epoch": 14.0, "eval_accuracy": 0.88284, "eval_loss": 0.8721671104431152, "eval_runtime": 34.093, "eval_samples_per_second": 733.288, "eval_steps_per_second": 2.874, "step": 10948 }, { "epoch": 14.07, "learning_rate": 1.483375959079284e-05, "loss": 0.0059, "step": 11000 }, { "epoch": 15.0, "eval_accuracy": 0.88448, "eval_loss": 1.0063711404800415, "eval_runtime": 34.0403, "eval_samples_per_second": 734.424, "eval_steps_per_second": 2.879, "step": 11730 }, { "epoch": 15.35, "learning_rate": 1.163682864450128e-05, "loss": 0.0028, "step": 12000 }, { "epoch": 16.0, "eval_accuracy": 0.88608, "eval_loss": 1.0448594093322754, "eval_runtime": 34.3573, "eval_samples_per_second": 727.648, "eval_steps_per_second": 2.852, "step": 12512 }, { "epoch": 16.62, "learning_rate": 8.439897698209718e-06, "loss": 0.0018, "step": 13000 }, { "epoch": 17.0, "eval_accuracy": 0.8828, "eval_loss": 1.1077994108200073, "eval_runtime": 34.1209, "eval_samples_per_second": 732.69, "eval_steps_per_second": 2.872, "step": 13294 }, { "epoch": 17.9, "learning_rate": 5.242966751918159e-06, "loss": 0.0014, "step": 14000 }, { "epoch": 18.0, "eval_accuracy": 0.88408, "eval_loss": 1.1377348899841309, "eval_runtime": 34.0133, "eval_samples_per_second": 735.007, "eval_steps_per_second": 2.881, "step": 14076 }, { "epoch": 19.0, "eval_accuracy": 0.88492, "eval_loss": 1.1733046770095825, "eval_runtime": 34.1578, "eval_samples_per_second": 731.898, "eval_steps_per_second": 2.869, "step": 14858 }, { "epoch": 19.18, "learning_rate": 2.0460358056265987e-06, "loss": 0.0003, "step": 15000 }, { "epoch": 20.0, "eval_accuracy": 0.8846, "eval_loss": 1.177056074142456, "eval_runtime": 34.2367, "eval_samples_per_second": 730.21, "eval_steps_per_second": 2.862, "step": 15640 }, { "epoch": 20.0, "step": 15640, "total_flos": 3.288888192e+16, "train_loss": 0.043571622534404934, "train_runtime": 2625.0815, "train_samples_per_second": 190.47, "train_steps_per_second": 5.958 } ], "logging_steps": 1000, "max_steps": 15640, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.288888192e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }