| { | |
| "best_metric": 0.02430173009634018, | |
| "best_model_checkpoint": "./results_pnum/results_cvrev_pnum_f1_large_b4e15_5000/checkpoint-10000", | |
| "epoch": 10.277492291880781, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.91485935519238e-05, | |
| "loss": 0.5284, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.829204782951314e-05, | |
| "loss": 0.0338, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.743550210710248e-05, | |
| "loss": 0.0311, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.657895638469182e-05, | |
| "loss": 0.0299, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.572241066228116e-05, | |
| "loss": 0.029, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.48658649398705e-05, | |
| "loss": 0.0277, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.400931921745983e-05, | |
| "loss": 0.0278, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.315277349504917e-05, | |
| "loss": 0.0273, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 4.229622777263851e-05, | |
| "loss": 0.0267, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.143968205022784e-05, | |
| "loss": 0.026, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.024562297388911247, | |
| "eval_runtime": 103.14, | |
| "eval_samples_per_second": 25.189, | |
| "eval_steps_per_second": 6.302, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.058313632781718e-05, | |
| "loss": 0.0275, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.972659060540652e-05, | |
| "loss": 0.0301, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.887004488299585e-05, | |
| "loss": 0.0273, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.801349916058519e-05, | |
| "loss": 0.0279, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 3.715695343817453e-05, | |
| "loss": 0.0268, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.630040771576387e-05, | |
| "loss": 0.0261, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.544386199335321e-05, | |
| "loss": 0.0255, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.458731627094255e-05, | |
| "loss": 0.0245, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 3.373077054853188e-05, | |
| "loss": 0.0241, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 3.287422482612122e-05, | |
| "loss": 0.0235, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "eval_loss": 0.02430173009634018, | |
| "eval_runtime": 102.9965, | |
| "eval_samples_per_second": 25.224, | |
| "eval_steps_per_second": 6.311, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 3.201767910371056e-05, | |
| "loss": 0.0232, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 3.11611333812999e-05, | |
| "loss": 0.0238, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 3.0304587658889232e-05, | |
| "loss": 0.0233, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 2.944804193647857e-05, | |
| "loss": 0.0223, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 2.8591496214067907e-05, | |
| "loss": 0.0217, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 2.7734950491657246e-05, | |
| "loss": 0.0218, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 2.6878404769246586e-05, | |
| "loss": 0.0218, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 2.602185904683592e-05, | |
| "loss": 0.0211, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 2.516531332442526e-05, | |
| "loss": 0.0207, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 2.4308767602014596e-05, | |
| "loss": 0.0202, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "eval_loss": 0.024446425959467888, | |
| "eval_runtime": 102.9742, | |
| "eval_samples_per_second": 25.23, | |
| "eval_steps_per_second": 6.312, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 2.3452221879603932e-05, | |
| "loss": 0.0202, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 2.259567615719327e-05, | |
| "loss": 0.0193, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.0188, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 2.0882584712371946e-05, | |
| "loss": 0.0189, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 2.0026038989961285e-05, | |
| "loss": 0.0187, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 1.9169493267550624e-05, | |
| "loss": 0.0173, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 1.831294754513996e-05, | |
| "loss": 0.0172, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 1.74564018227293e-05, | |
| "loss": 0.017, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "learning_rate": 1.6599856100318635e-05, | |
| "loss": 0.0168, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 1.5743310377907974e-05, | |
| "loss": 0.016, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "eval_loss": 0.028702957555651665, | |
| "eval_runtime": 103.0001, | |
| "eval_samples_per_second": 25.223, | |
| "eval_steps_per_second": 6.311, | |
| "step": 20000 | |
| } | |
| ], | |
| "max_steps": 29190, | |
| "num_train_epochs": 15, | |
| "total_flos": 1.7316048273408e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |