{ "best_metric": 0.024844545871019363, "best_model_checkpoint": "./results_pnum/results_cvrev_pnum_f0_large_b4e15_5000/checkpoint-10000", "epoch": 10.277492291880781, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 4.91485935519238e-05, "loss": 0.5437, "step": 500 }, { "epoch": 0.51, "learning_rate": 4.829204782951314e-05, "loss": 0.0334, "step": 1000 }, { "epoch": 0.77, "learning_rate": 4.743550210710248e-05, "loss": 0.0315, "step": 1500 }, { "epoch": 1.03, "learning_rate": 4.657895638469182e-05, "loss": 0.0313, "step": 2000 }, { "epoch": 1.28, "learning_rate": 4.572241066228116e-05, "loss": 0.0295, "step": 2500 }, { "epoch": 1.54, "learning_rate": 4.48658649398705e-05, "loss": 0.0284, "step": 3000 }, { "epoch": 1.8, "learning_rate": 4.400931921745983e-05, "loss": 0.0283, "step": 3500 }, { "epoch": 2.06, "learning_rate": 4.315277349504917e-05, "loss": 0.0268, "step": 4000 }, { "epoch": 2.31, "learning_rate": 4.229622777263851e-05, "loss": 0.0267, "step": 4500 }, { "epoch": 2.57, "learning_rate": 4.143968205022784e-05, "loss": 0.0263, "step": 5000 }, { "epoch": 2.57, "eval_loss": 0.02492157183587551, "eval_runtime": 103.3756, "eval_samples_per_second": 25.083, "eval_steps_per_second": 6.278, "step": 5000 }, { "epoch": 2.83, "learning_rate": 4.058313632781718e-05, "loss": 0.0262, "step": 5500 }, { "epoch": 3.08, "learning_rate": 3.972659060540652e-05, "loss": 0.0252, "step": 6000 }, { "epoch": 3.34, "learning_rate": 3.887004488299585e-05, "loss": 0.0247, "step": 6500 }, { "epoch": 3.6, "learning_rate": 3.801349916058519e-05, "loss": 0.0251, "step": 7000 }, { "epoch": 3.85, "learning_rate": 3.715695343817453e-05, "loss": 0.0244, "step": 7500 }, { "epoch": 4.11, "learning_rate": 3.630040771576387e-05, "loss": 0.0239, "step": 8000 }, { "epoch": 4.37, "learning_rate": 3.544386199335321e-05, "loss": 0.0233, "step": 8500 }, { "epoch": 4.62, "learning_rate": 3.458731627094255e-05, "loss": 0.0229, "step": 9000 }, { "epoch": 4.88, "learning_rate": 3.373077054853188e-05, "loss": 0.0232, "step": 9500 }, { "epoch": 5.14, "learning_rate": 3.287422482612122e-05, "loss": 0.0226, "step": 10000 }, { "epoch": 5.14, "eval_loss": 0.024844545871019363, "eval_runtime": 103.4165, "eval_samples_per_second": 25.073, "eval_steps_per_second": 6.276, "step": 10000 }, { "epoch": 5.4, "learning_rate": 3.201767910371056e-05, "loss": 0.0216, "step": 10500 }, { "epoch": 5.65, "learning_rate": 3.11611333812999e-05, "loss": 0.0219, "step": 11000 }, { "epoch": 5.91, "learning_rate": 3.0304587658889232e-05, "loss": 0.0213, "step": 11500 }, { "epoch": 6.17, "learning_rate": 2.944804193647857e-05, "loss": 0.0203, "step": 12000 }, { "epoch": 6.42, "learning_rate": 2.8591496214067907e-05, "loss": 0.0204, "step": 12500 }, { "epoch": 6.68, "learning_rate": 2.7734950491657246e-05, "loss": 0.0205, "step": 13000 }, { "epoch": 6.94, "learning_rate": 2.6878404769246586e-05, "loss": 0.02, "step": 13500 }, { "epoch": 7.19, "learning_rate": 2.602185904683592e-05, "loss": 0.0186, "step": 14000 }, { "epoch": 7.45, "learning_rate": 2.516531332442526e-05, "loss": 0.0181, "step": 14500 }, { "epoch": 7.71, "learning_rate": 2.4308767602014596e-05, "loss": 0.0181, "step": 15000 }, { "epoch": 7.71, "eval_loss": 0.026644999161362648, "eval_runtime": 103.372, "eval_samples_per_second": 25.084, "eval_steps_per_second": 6.278, "step": 15000 }, { "epoch": 7.97, "learning_rate": 2.3452221879603932e-05, "loss": 0.0187, "step": 15500 }, { "epoch": 8.22, "learning_rate": 2.259567615719327e-05, "loss": 0.0169, "step": 16000 }, { "epoch": 8.48, "learning_rate": 2.173913043478261e-05, "loss": 0.0168, "step": 16500 }, { "epoch": 8.74, "learning_rate": 2.0882584712371946e-05, "loss": 0.0164, "step": 17000 }, { "epoch": 8.99, "learning_rate": 2.0026038989961285e-05, "loss": 0.0167, "step": 17500 }, { "epoch": 9.25, "learning_rate": 1.9169493267550624e-05, "loss": 0.0152, "step": 18000 }, { "epoch": 9.51, "learning_rate": 1.831294754513996e-05, "loss": 0.0154, "step": 18500 }, { "epoch": 9.76, "learning_rate": 1.74564018227293e-05, "loss": 0.0148, "step": 19000 }, { "epoch": 10.02, "learning_rate": 1.6599856100318635e-05, "loss": 0.0147, "step": 19500 }, { "epoch": 10.28, "learning_rate": 1.5743310377907974e-05, "loss": 0.0137, "step": 20000 }, { "epoch": 10.28, "eval_loss": 0.03126714006066322, "eval_runtime": 103.3509, "eval_samples_per_second": 25.089, "eval_steps_per_second": 6.28, "step": 20000 } ], "max_steps": 29190, "num_train_epochs": 15, "total_flos": 1.7320378368e+17, "trial_name": null, "trial_params": null }