{ "best_global_step": 102, "best_metric": 0.613166332244873, "best_model_checkpoint": "distilbert_rand_100_v2_cola/checkpoint-102", "epoch": 8.0, "eval_steps": 500, "global_step": 272, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.0599257946014404, "learning_rate": 4.9e-05, "loss": 0.6127, "step": 34 }, { "epoch": 1.0, "eval_accuracy": 0.6912751793861389, "eval_loss": 0.61478590965271, "eval_matthews_correlation": 0.0, "eval_runtime": 0.4384, "eval_samples_per_second": 2379.274, "eval_steps_per_second": 11.406, "step": 34 }, { "epoch": 2.0, "grad_norm": 1.9703173637390137, "learning_rate": 4.8e-05, "loss": 0.591, "step": 68 }, { "epoch": 2.0, "eval_accuracy": 0.6883988380432129, "eval_loss": 0.6217056512832642, "eval_matthews_correlation": -0.016334423518002312, "eval_runtime": 0.4757, "eval_samples_per_second": 2192.413, "eval_steps_per_second": 10.51, "step": 68 }, { "epoch": 3.0, "grad_norm": 2.5333664417266846, "learning_rate": 4.7e-05, "loss": 0.5421, "step": 102 }, { "epoch": 3.0, "eval_accuracy": 0.6826462149620056, "eval_loss": 0.613166332244873, "eval_matthews_correlation": 0.07482349006947582, "eval_runtime": 0.4492, "eval_samples_per_second": 2321.947, "eval_steps_per_second": 11.131, "step": 102 }, { "epoch": 4.0, "grad_norm": 4.39423942565918, "learning_rate": 4.600000000000001e-05, "loss": 0.4864, "step": 136 }, { "epoch": 4.0, "eval_accuracy": 0.6596356630325317, "eval_loss": 0.7308068871498108, "eval_matthews_correlation": 0.1075004609011948, "eval_runtime": 0.4584, "eval_samples_per_second": 2275.494, "eval_steps_per_second": 10.908, "step": 136 }, { "epoch": 5.0, "grad_norm": 2.305387020111084, "learning_rate": 4.5e-05, "loss": 0.4232, "step": 170 }, { "epoch": 5.0, "eval_accuracy": 0.6577181220054626, "eval_loss": 0.752271831035614, "eval_matthews_correlation": 0.1393279362508076, "eval_runtime": 0.4369, "eval_samples_per_second": 2387.23, "eval_steps_per_second": 11.444, "step": 170 }, { "epoch": 6.0, "grad_norm": 2.593329906463623, "learning_rate": 4.4000000000000006e-05, "loss": 0.3623, "step": 204 }, { "epoch": 6.0, "eval_accuracy": 0.6500479578971863, "eval_loss": 0.8274661898612976, "eval_matthews_correlation": 0.11024544231549166, "eval_runtime": 0.5541, "eval_samples_per_second": 1882.371, "eval_steps_per_second": 9.024, "step": 204 }, { "epoch": 7.0, "grad_norm": 3.920320987701416, "learning_rate": 4.3e-05, "loss": 0.3196, "step": 238 }, { "epoch": 7.0, "eval_accuracy": 0.6327900290489197, "eval_loss": 0.9465068578720093, "eval_matthews_correlation": 0.10253787164332456, "eval_runtime": 0.4519, "eval_samples_per_second": 2307.818, "eval_steps_per_second": 11.063, "step": 238 }, { "epoch": 8.0, "grad_norm": 3.7900238037109375, "learning_rate": 4.2e-05, "loss": 0.2848, "step": 272 }, { "epoch": 8.0, "eval_accuracy": 0.6481304168701172, "eval_loss": 1.0343079566955566, "eval_matthews_correlation": 0.1313945052317705, "eval_runtime": 0.4753, "eval_samples_per_second": 2194.285, "eval_steps_per_second": 10.519, "step": 272 }, { "epoch": 8.0, "step": 272, "total_flos": 4530914903629824.0, "train_loss": 0.4527585190885207, "train_runtime": 77.9805, "train_samples_per_second": 5482.781, "train_steps_per_second": 21.8 } ], "logging_steps": 1, "max_steps": 1700, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4530914903629824.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }