| { | |
| "best_metric": 0.5166051660516605, | |
| "best_model_checkpoint": "RuleBert-v0.2-k0/checkpoint-1500", | |
| "epoch": 0.49382716049382713, | |
| "eval_steps": 250, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.937500000000001e-06, | |
| "loss": 0.5636, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.875000000000001e-06, | |
| "loss": 0.3677, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.8125e-06, | |
| "loss": 0.3585, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.75e-06, | |
| "loss": 0.3372, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.6875e-06, | |
| "loss": 0.3226, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.06666666666666667, | |
| "eval_f1": 0.5180722891566265, | |
| "eval_loss": 0.36232367157936096, | |
| "eval_roc_auc": 0.6758306628436499, | |
| "eval_runtime": 1.6645, | |
| "eval_samples_per_second": 90.118, | |
| "eval_steps_per_second": 1.802, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.625e-06, | |
| "loss": 0.3323, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.562500000000002e-06, | |
| "loss": 0.3451, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.3317, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.4375e-06, | |
| "loss": 0.3297, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.3287, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.0, | |
| "eval_f1": 0.49723756906077354, | |
| "eval_loss": 0.3665144741535187, | |
| "eval_roc_auc": 0.6720357564513407, | |
| "eval_runtime": 1.6779, | |
| "eval_samples_per_second": 89.396, | |
| "eval_steps_per_second": 1.788, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.312500000000001e-06, | |
| "loss": 0.3359, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.25125e-06, | |
| "loss": 0.3331, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.18875e-06, | |
| "loss": 0.3374, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.126250000000001e-06, | |
| "loss": 0.3367, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.065e-06, | |
| "loss": 0.3195, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.04, | |
| "eval_f1": 0.5126993502658004, | |
| "eval_loss": 0.3585328757762909, | |
| "eval_roc_auc": 0.6747933884297521, | |
| "eval_runtime": 1.6851, | |
| "eval_samples_per_second": 89.013, | |
| "eval_steps_per_second": 1.78, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.0025e-06, | |
| "loss": 0.3226, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 8.94e-06, | |
| "loss": 0.3435, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 8.87875e-06, | |
| "loss": 0.3213, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 8.817500000000002e-06, | |
| "loss": 0.3313, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.755e-06, | |
| "loss": 0.3241, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.03333333333333333, | |
| "eval_f1": 0.5134189031505251, | |
| "eval_loss": 0.3658309876918793, | |
| "eval_roc_auc": 0.6758812615955473, | |
| "eval_runtime": 1.6782, | |
| "eval_samples_per_second": 89.383, | |
| "eval_steps_per_second": 1.788, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.6925e-06, | |
| "loss": 0.3257, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 8.63e-06, | |
| "loss": 0.3288, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.5675e-06, | |
| "loss": 0.3331, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.505e-06, | |
| "loss": 0.3529, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.442500000000001e-06, | |
| "loss": 0.3321, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.0, | |
| "eval_f1": 0.49723756906077354, | |
| "eval_loss": 0.3737262487411499, | |
| "eval_roc_auc": 0.6720357564513407, | |
| "eval_runtime": 1.69, | |
| "eval_samples_per_second": 88.759, | |
| "eval_steps_per_second": 1.775, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.380000000000001e-06, | |
| "loss": 0.3339, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.317500000000001e-06, | |
| "loss": 0.3153, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.255000000000001e-06, | |
| "loss": 0.3406, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.192500000000002e-06, | |
| "loss": 0.3109, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 8.13e-06, | |
| "loss": 0.3315, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.02666666666666667, | |
| "eval_f1": 0.5166051660516605, | |
| "eval_loss": 0.3748493194580078, | |
| "eval_roc_auc": 0.6842047562826783, | |
| "eval_runtime": 1.6814, | |
| "eval_samples_per_second": 89.211, | |
| "eval_steps_per_second": 1.784, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 8.0675e-06, | |
| "loss": 0.3066, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 8.005e-06, | |
| "loss": 0.3143, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.9425e-06, | |
| "loss": 0.3286, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.88e-06, | |
| "loss": 0.3125, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 7.8175e-06, | |
| "loss": 0.3384, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.02666666666666667, | |
| "eval_f1": 0.5051546391752577, | |
| "eval_loss": 0.367872029542923, | |
| "eval_roc_auc": 0.6734187890032046, | |
| "eval_runtime": 1.6925, | |
| "eval_samples_per_second": 88.625, | |
| "eval_steps_per_second": 1.772, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 7.755000000000001e-06, | |
| "loss": 0.3061, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.692500000000001e-06, | |
| "loss": 0.32, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.630000000000001e-06, | |
| "loss": 0.3211, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.567500000000001e-06, | |
| "loss": 0.3104, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 7.505e-06, | |
| "loss": 0.3338, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.02, | |
| "eval_f1": 0.5050732807215332, | |
| "eval_loss": 0.3726758658885956, | |
| "eval_roc_auc": 0.6743632990386238, | |
| "eval_runtime": 1.6832, | |
| "eval_samples_per_second": 89.116, | |
| "eval_steps_per_second": 1.782, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 8000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "total_flos": 1052841099264000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |