{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 522, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "global_step": 20, "learning_rate": 3.7735849056603776e-05, "loss": 0.2804, "step": 20 }, { "epoch": 0.23, "global_step": 40, "learning_rate": 7.547169811320755e-05, "loss": 0.2879, "step": 40 }, { "epoch": 0.34, "global_step": 60, "learning_rate": 0.0001, "loss": 0.2889, "step": 60 }, { "epoch": 0.46, "global_step": 80, "learning_rate": 0.0001, "loss": 0.2898, "step": 80 }, { "epoch": 0.57, "global_step": 100, "learning_rate": 0.0001, "loss": 0.2622, "step": 100 }, { "epoch": 0.69, "global_step": 120, "learning_rate": 0.0001, "loss": 0.2734, "step": 120 }, { "epoch": 0.8, "global_step": 140, "learning_rate": 0.0001, "loss": 0.2923, "step": 140 }, { "epoch": 0.92, "global_step": 160, "learning_rate": 0.0001, "loss": 0.2827, "step": 160 }, { "epoch": 1.03, "global_step": 180, "learning_rate": 0.0001, "loss": 0.2054, "step": 180 }, { "epoch": 1.15, "global_step": 200, "learning_rate": 0.0001, "loss": 0.1518, "step": 200 }, { "epoch": 1.26, "global_step": 220, "learning_rate": 0.0001, "loss": 0.1561, "step": 220 }, { "epoch": 1.38, "global_step": 240, "learning_rate": 0.0001, "loss": 0.1274, "step": 240 }, { "epoch": 1.49, "global_step": 260, "learning_rate": 0.0001, "loss": 0.1318, "step": 260 }, { "epoch": 1.61, "global_step": 280, "learning_rate": 0.0001, "loss": 0.1201, "step": 280 }, { "epoch": 1.72, "global_step": 300, "learning_rate": 0.0001, "loss": 0.1661, "step": 300 }, { "epoch": 1.84, "global_step": 320, "learning_rate": 0.0001, "loss": 0.1922, "step": 320 }, { "epoch": 1.95, "global_step": 340, "learning_rate": 0.0001, "loss": 0.1123, "step": 340 }, { "epoch": 2.07, "global_step": 360, "learning_rate": 0.0001, "loss": 0.0836, "step": 360 }, { "epoch": 2.18, "global_step": 380, "learning_rate": 0.0001, "loss": 0.0435, "step": 380 }, { "epoch": 2.3, "global_step": 400, "learning_rate": 0.0001, "loss": 0.0541, "step": 400 }, { "epoch": 2.41, "global_step": 420, "learning_rate": 0.0001, "loss": 0.0895, "step": 420 }, { "epoch": 2.53, "global_step": 440, "learning_rate": 0.0001, "loss": 0.0636, "step": 440 }, { "epoch": 2.64, "global_step": 460, "learning_rate": 0.0001, "loss": 0.0681, "step": 460 }, { "epoch": 2.76, "global_step": 480, "learning_rate": 0.0001, "loss": 0.0669, "step": 480 }, { "epoch": 2.87, "global_step": 500, "learning_rate": 0.0001, "loss": 0.0711, "step": 500 }, { "epoch": 2.99, "global_step": 520, "learning_rate": 0.0001, "loss": 0.0772, "step": 520 }, { "epoch": 3.0, "step": 522, "total_flos": 2.6811251459044147e+17, "train_loss": 0.16264161262018928, "train_runtime": 12144.157, "train_samples_per_second": 0.858, "train_steps_per_second": 0.043 } ], "logging_steps": 20, "max_steps": 522, "num_train_epochs": 3, "save_steps": 300, "total_flos": 2.6811251459044147e+17, "trial_name": null, "trial_params": null }