{ "best_metric": 0.02430173009634018, "best_model_checkpoint": "./results_pnum/results_cvrev_pnum_f1_large_b4e15_5000/checkpoint-10000", "epoch": 10.277492291880781, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 4.91485935519238e-05, "loss": 0.5284, "step": 500 }, { "epoch": 0.51, "learning_rate": 4.829204782951314e-05, "loss": 0.0338, "step": 1000 }, { "epoch": 0.77, "learning_rate": 4.743550210710248e-05, "loss": 0.0311, "step": 1500 }, { "epoch": 1.03, "learning_rate": 4.657895638469182e-05, "loss": 0.0299, "step": 2000 }, { "epoch": 1.28, "learning_rate": 4.572241066228116e-05, "loss": 0.029, "step": 2500 }, { "epoch": 1.54, "learning_rate": 4.48658649398705e-05, "loss": 0.0277, "step": 3000 }, { "epoch": 1.8, "learning_rate": 4.400931921745983e-05, "loss": 0.0278, "step": 3500 }, { "epoch": 2.06, "learning_rate": 4.315277349504917e-05, "loss": 0.0273, "step": 4000 }, { "epoch": 2.31, "learning_rate": 4.229622777263851e-05, "loss": 0.0267, "step": 4500 }, { "epoch": 2.57, "learning_rate": 4.143968205022784e-05, "loss": 0.026, "step": 5000 }, { "epoch": 2.57, "eval_loss": 0.024562297388911247, "eval_runtime": 103.14, "eval_samples_per_second": 25.189, "eval_steps_per_second": 6.302, "step": 5000 }, { "epoch": 2.83, "learning_rate": 4.058313632781718e-05, "loss": 0.0275, "step": 5500 }, { "epoch": 3.08, "learning_rate": 3.972659060540652e-05, "loss": 0.0301, "step": 6000 }, { "epoch": 3.34, "learning_rate": 3.887004488299585e-05, "loss": 0.0273, "step": 6500 }, { "epoch": 3.6, "learning_rate": 3.801349916058519e-05, "loss": 0.0279, "step": 7000 }, { "epoch": 3.85, "learning_rate": 3.715695343817453e-05, "loss": 0.0268, "step": 7500 }, { "epoch": 4.11, "learning_rate": 3.630040771576387e-05, "loss": 0.0261, "step": 8000 }, { "epoch": 4.37, "learning_rate": 3.544386199335321e-05, "loss": 0.0255, "step": 8500 }, { "epoch": 4.62, "learning_rate": 3.458731627094255e-05, "loss": 0.0245, "step": 9000 }, { "epoch": 4.88, "learning_rate": 3.373077054853188e-05, "loss": 0.0241, "step": 9500 }, { "epoch": 5.14, "learning_rate": 3.287422482612122e-05, "loss": 0.0235, "step": 10000 }, { "epoch": 5.14, "eval_loss": 0.02430173009634018, "eval_runtime": 102.9965, "eval_samples_per_second": 25.224, "eval_steps_per_second": 6.311, "step": 10000 }, { "epoch": 5.4, "learning_rate": 3.201767910371056e-05, "loss": 0.0232, "step": 10500 }, { "epoch": 5.65, "learning_rate": 3.11611333812999e-05, "loss": 0.0238, "step": 11000 }, { "epoch": 5.91, "learning_rate": 3.0304587658889232e-05, "loss": 0.0233, "step": 11500 }, { "epoch": 6.17, "learning_rate": 2.944804193647857e-05, "loss": 0.0223, "step": 12000 }, { "epoch": 6.42, "learning_rate": 2.8591496214067907e-05, "loss": 0.0217, "step": 12500 }, { "epoch": 6.68, "learning_rate": 2.7734950491657246e-05, "loss": 0.0218, "step": 13000 }, { "epoch": 6.94, "learning_rate": 2.6878404769246586e-05, "loss": 0.0218, "step": 13500 }, { "epoch": 7.19, "learning_rate": 2.602185904683592e-05, "loss": 0.0211, "step": 14000 }, { "epoch": 7.45, "learning_rate": 2.516531332442526e-05, "loss": 0.0207, "step": 14500 }, { "epoch": 7.71, "learning_rate": 2.4308767602014596e-05, "loss": 0.0202, "step": 15000 }, { "epoch": 7.71, "eval_loss": 0.024446425959467888, "eval_runtime": 102.9742, "eval_samples_per_second": 25.23, "eval_steps_per_second": 6.312, "step": 15000 }, { "epoch": 7.97, "learning_rate": 2.3452221879603932e-05, "loss": 0.0202, "step": 15500 }, { "epoch": 8.22, "learning_rate": 2.259567615719327e-05, "loss": 0.0193, "step": 16000 }, { "epoch": 8.48, "learning_rate": 2.173913043478261e-05, "loss": 0.0188, "step": 16500 }, { "epoch": 8.74, "learning_rate": 2.0882584712371946e-05, "loss": 0.0189, "step": 17000 }, { "epoch": 8.99, "learning_rate": 2.0026038989961285e-05, "loss": 0.0187, "step": 17500 }, { "epoch": 9.25, "learning_rate": 1.9169493267550624e-05, "loss": 0.0173, "step": 18000 }, { "epoch": 9.51, "learning_rate": 1.831294754513996e-05, "loss": 0.0172, "step": 18500 }, { "epoch": 9.76, "learning_rate": 1.74564018227293e-05, "loss": 0.017, "step": 19000 }, { "epoch": 10.02, "learning_rate": 1.6599856100318635e-05, "loss": 0.0168, "step": 19500 }, { "epoch": 10.28, "learning_rate": 1.5743310377907974e-05, "loss": 0.016, "step": 20000 }, { "epoch": 10.28, "eval_loss": 0.028702957555651665, "eval_runtime": 103.0001, "eval_samples_per_second": 25.223, "eval_steps_per_second": 6.311, "step": 20000 } ], "max_steps": 29190, "num_train_epochs": 15, "total_flos": 1.7316048273408e+17, "trial_name": null, "trial_params": null }