{ "best_metric": 0.64125, "best_model_checkpoint": "./results/checkpoint-5000", "epoch": 25.0, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.9333333333333333e-05, "loss": 1.0006, "step": 200 }, { "epoch": 1.25, "eval_accuracy": 0.58125, "eval_loss": 0.8775196075439453, "eval_runtime": 16.4637, "eval_samples_per_second": 48.592, "eval_steps_per_second": 3.037, "step": 250 }, { "epoch": 2.0, "learning_rate": 1.866666666666667e-05, "loss": 0.8654, "step": 400 }, { "epoch": 2.5, "eval_accuracy": 0.6275, "eval_loss": 0.8210939168930054, "eval_runtime": 15.3938, "eval_samples_per_second": 51.969, "eval_steps_per_second": 3.248, "step": 500 }, { "epoch": 3.0, "learning_rate": 1.8e-05, "loss": 0.7041, "step": 600 }, { "epoch": 3.75, "eval_accuracy": 0.62125, "eval_loss": 0.9076805710792542, "eval_runtime": 15.4386, "eval_samples_per_second": 51.818, "eval_steps_per_second": 3.239, "step": 750 }, { "epoch": 4.0, "learning_rate": 1.7333333333333336e-05, "loss": 0.5119, "step": 800 }, { "epoch": 5.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.3314, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.63125, "eval_loss": 1.0315085649490356, "eval_runtime": 15.309, "eval_samples_per_second": 52.257, "eval_steps_per_second": 3.266, "step": 1000 }, { "epoch": 6.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.2029, "step": 1200 }, { "epoch": 6.25, "eval_accuracy": 0.64, "eval_loss": 1.349707841873169, "eval_runtime": 15.3657, "eval_samples_per_second": 52.064, "eval_steps_per_second": 3.254, "step": 1250 }, { "epoch": 7.0, "learning_rate": 1.5333333333333334e-05, "loss": 0.1332, "step": 1400 }, { "epoch": 7.5, "eval_accuracy": 0.56375, "eval_loss": 2.2334282398223877, "eval_runtime": 15.4014, "eval_samples_per_second": 51.943, "eval_steps_per_second": 3.246, "step": 1500 }, { "epoch": 8.0, "learning_rate": 1.4666666666666666e-05, "loss": 0.0748, "step": 1600 }, { "epoch": 8.75, "eval_accuracy": 0.605, "eval_loss": 2.0466909408569336, "eval_runtime": 15.38, "eval_samples_per_second": 52.016, "eval_steps_per_second": 3.251, "step": 1750 }, { "epoch": 9.0, "learning_rate": 1.4e-05, "loss": 0.0476, "step": 1800 }, { "epoch": 10.0, "learning_rate": 1.3333333333333333e-05, "loss": 0.0282, "step": 2000 }, { "epoch": 10.0, "eval_accuracy": 0.6175, "eval_loss": 2.3013718128204346, "eval_runtime": 15.3412, "eval_samples_per_second": 52.147, "eval_steps_per_second": 3.259, "step": 2000 }, { "epoch": 11.0, "learning_rate": 1.2666666666666667e-05, "loss": 0.038, "step": 2200 }, { "epoch": 11.25, "eval_accuracy": 0.63, "eval_loss": 2.488283157348633, "eval_runtime": 15.3496, "eval_samples_per_second": 52.119, "eval_steps_per_second": 3.257, "step": 2250 }, { "epoch": 12.0, "learning_rate": 1.2e-05, "loss": 0.0218, "step": 2400 }, { "epoch": 12.5, "eval_accuracy": 0.62875, "eval_loss": 2.5122828483581543, "eval_runtime": 15.4178, "eval_samples_per_second": 51.888, "eval_steps_per_second": 3.243, "step": 2500 }, { "epoch": 13.0, "learning_rate": 1.1333333333333334e-05, "loss": 0.0174, "step": 2600 }, { "epoch": 13.75, "eval_accuracy": 0.62125, "eval_loss": 2.5520098209381104, "eval_runtime": 15.4539, "eval_samples_per_second": 51.767, "eval_steps_per_second": 3.235, "step": 2750 }, { "epoch": 14.0, "learning_rate": 1.0666666666666667e-05, "loss": 0.0114, "step": 2800 }, { "epoch": 15.0, "learning_rate": 1e-05, "loss": 0.0134, "step": 3000 }, { "epoch": 15.0, "eval_accuracy": 0.61, "eval_loss": 2.702610969543457, "eval_runtime": 15.3837, "eval_samples_per_second": 52.003, "eval_steps_per_second": 3.25, "step": 3000 }, { "epoch": 16.0, "learning_rate": 9.333333333333334e-06, "loss": 0.0107, "step": 3200 }, { "epoch": 16.25, "eval_accuracy": 0.605, "eval_loss": 2.7754039764404297, "eval_runtime": 15.3762, "eval_samples_per_second": 52.029, "eval_steps_per_second": 3.252, "step": 3250 }, { "epoch": 17.0, "learning_rate": 8.666666666666668e-06, "loss": 0.0088, "step": 3400 }, { "epoch": 17.5, "eval_accuracy": 0.59875, "eval_loss": 3.0348358154296875, "eval_runtime": 15.3746, "eval_samples_per_second": 52.034, "eval_steps_per_second": 3.252, "step": 3500 }, { "epoch": 18.0, "learning_rate": 8.000000000000001e-06, "loss": 0.0084, "step": 3600 }, { "epoch": 18.75, "eval_accuracy": 0.6375, "eval_loss": 2.828749656677246, "eval_runtime": 15.4159, "eval_samples_per_second": 51.894, "eval_steps_per_second": 3.243, "step": 3750 }, { "epoch": 19.0, "learning_rate": 7.333333333333333e-06, "loss": 0.0085, "step": 3800 }, { "epoch": 20.0, "learning_rate": 6.666666666666667e-06, "loss": 0.0044, "step": 4000 }, { "epoch": 20.0, "eval_accuracy": 0.62875, "eval_loss": 2.9390206336975098, "eval_runtime": 15.4137, "eval_samples_per_second": 51.902, "eval_steps_per_second": 3.244, "step": 4000 }, { "epoch": 21.0, "learning_rate": 6e-06, "loss": 0.0093, "step": 4200 }, { "epoch": 21.25, "eval_accuracy": 0.60375, "eval_loss": 3.133685350418091, "eval_runtime": 15.4418, "eval_samples_per_second": 51.808, "eval_steps_per_second": 3.238, "step": 4250 }, { "epoch": 22.0, "learning_rate": 5.333333333333334e-06, "loss": 0.0071, "step": 4400 }, { "epoch": 22.5, "eval_accuracy": 0.6175, "eval_loss": 2.979722261428833, "eval_runtime": 15.4954, "eval_samples_per_second": 51.628, "eval_steps_per_second": 3.227, "step": 4500 }, { "epoch": 23.0, "learning_rate": 4.666666666666667e-06, "loss": 0.0087, "step": 4600 }, { "epoch": 23.75, "eval_accuracy": 0.61625, "eval_loss": 3.0937910079956055, "eval_runtime": 15.473, "eval_samples_per_second": 51.703, "eval_steps_per_second": 3.231, "step": 4750 }, { "epoch": 24.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0048, "step": 4800 }, { "epoch": 25.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.0033, "step": 5000 }, { "epoch": 25.0, "eval_accuracy": 0.64125, "eval_loss": 2.959096908569336, "eval_runtime": 15.3539, "eval_samples_per_second": 52.104, "eval_steps_per_second": 3.257, "step": 5000 } ], "max_steps": 6000, "num_train_epochs": 30, "total_flos": 1.059758088192e+16, "trial_name": null, "trial_params": null }