| { | |
| "best_metric": 0.7537589073181152, | |
| "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000", | |
| "epoch": 0.9667673716012085, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2e-07, | |
| "loss": 1.0361, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 3.98e-07, | |
| "loss": 0.9751, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 5.979999999999999e-07, | |
| "loss": 0.9807, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 7.94e-07, | |
| "loss": 0.9513, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.94e-07, | |
| "loss": 0.9237, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 0.9464540481567383, | |
| "eval_runtime": 37.5769, | |
| "eval_samples_per_second": 97.587, | |
| "eval_steps_per_second": 24.403, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 9.955915919781764e-07, | |
| "loss": 0.9367, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.81900265076038e-07, | |
| "loss": 0.8993, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.594490241150311e-07, | |
| "loss": 0.8586, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 9.282057505552949e-07, | |
| "loss": 0.854, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 8.889381125453379e-07, | |
| "loss": 0.8642, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 0.8270628452301025, | |
| "eval_runtime": 28.2214, | |
| "eval_samples_per_second": 129.937, | |
| "eval_steps_per_second": 32.493, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 8.423819662432867e-07, | |
| "loss": 0.8314, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.894097508558568e-07, | |
| "loss": 0.8317, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.310141395581585e-07, | |
| "loss": 0.7939, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.682894372882701e-07, | |
| "loss": 0.803, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 6.024110740127264e-07, | |
| "loss": 0.8029, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 0.7635419368743896, | |
| "eval_runtime": 28.2809, | |
| "eval_samples_per_second": 129.663, | |
| "eval_steps_per_second": 32.425, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 5.346135777490083e-07, | |
| "loss": 0.7959, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.6616744011972247e-07, | |
| "loss": 0.7708, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.9835530796656867e-07, | |
| "loss": 0.7636, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.3244794718149894e-07, | |
| "loss": 0.7864, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.696804291810131e-07, | |
| "loss": 0.8005, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 0.7537589073181152, | |
| "eval_runtime": 28.4034, | |
| "eval_samples_per_second": 129.104, | |
| "eval_steps_per_second": 32.285, | |
| "step": 2000 | |
| } | |
| ], | |
| "max_steps": 16544, | |
| "num_train_epochs": 8, | |
| "total_flos": 1828884830909760.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |