| { | |
| "best_metric": 0.024844545871019363, | |
| "best_model_checkpoint": "./results_pnum/results_cvrev_pnum_f0_large_b4e15_5000/checkpoint-10000", | |
| "epoch": 10.277492291880781, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.91485935519238e-05, | |
| "loss": 0.5437, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.829204782951314e-05, | |
| "loss": 0.0334, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.743550210710248e-05, | |
| "loss": 0.0315, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.657895638469182e-05, | |
| "loss": 0.0313, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.572241066228116e-05, | |
| "loss": 0.0295, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.48658649398705e-05, | |
| "loss": 0.0284, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.400931921745983e-05, | |
| "loss": 0.0283, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.315277349504917e-05, | |
| "loss": 0.0268, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 4.229622777263851e-05, | |
| "loss": 0.0267, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.143968205022784e-05, | |
| "loss": 0.0263, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.02492157183587551, | |
| "eval_runtime": 103.3756, | |
| "eval_samples_per_second": 25.083, | |
| "eval_steps_per_second": 6.278, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.058313632781718e-05, | |
| "loss": 0.0262, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.972659060540652e-05, | |
| "loss": 0.0252, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.887004488299585e-05, | |
| "loss": 0.0247, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.801349916058519e-05, | |
| "loss": 0.0251, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 3.715695343817453e-05, | |
| "loss": 0.0244, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.630040771576387e-05, | |
| "loss": 0.0239, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.544386199335321e-05, | |
| "loss": 0.0233, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.458731627094255e-05, | |
| "loss": 0.0229, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 3.373077054853188e-05, | |
| "loss": 0.0232, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 3.287422482612122e-05, | |
| "loss": 0.0226, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "eval_loss": 0.024844545871019363, | |
| "eval_runtime": 103.4165, | |
| "eval_samples_per_second": 25.073, | |
| "eval_steps_per_second": 6.276, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 3.201767910371056e-05, | |
| "loss": 0.0216, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 3.11611333812999e-05, | |
| "loss": 0.0219, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 3.0304587658889232e-05, | |
| "loss": 0.0213, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 2.944804193647857e-05, | |
| "loss": 0.0203, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 2.8591496214067907e-05, | |
| "loss": 0.0204, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 2.7734950491657246e-05, | |
| "loss": 0.0205, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 2.6878404769246586e-05, | |
| "loss": 0.02, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 2.602185904683592e-05, | |
| "loss": 0.0186, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 2.516531332442526e-05, | |
| "loss": 0.0181, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 2.4308767602014596e-05, | |
| "loss": 0.0181, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "eval_loss": 0.026644999161362648, | |
| "eval_runtime": 103.372, | |
| "eval_samples_per_second": 25.084, | |
| "eval_steps_per_second": 6.278, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 2.3452221879603932e-05, | |
| "loss": 0.0187, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 2.259567615719327e-05, | |
| "loss": 0.0169, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.0168, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 2.0882584712371946e-05, | |
| "loss": 0.0164, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 2.0026038989961285e-05, | |
| "loss": 0.0167, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 1.9169493267550624e-05, | |
| "loss": 0.0152, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 1.831294754513996e-05, | |
| "loss": 0.0154, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 1.74564018227293e-05, | |
| "loss": 0.0148, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "learning_rate": 1.6599856100318635e-05, | |
| "loss": 0.0147, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 1.5743310377907974e-05, | |
| "loss": 0.0137, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "eval_loss": 0.03126714006066322, | |
| "eval_runtime": 103.3509, | |
| "eval_samples_per_second": 25.089, | |
| "eval_steps_per_second": 6.28, | |
| "step": 20000 | |
| } | |
| ], | |
| "max_steps": 29190, | |
| "num_train_epochs": 15, | |
| "total_flos": 1.7320378368e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |