{ "best_global_step": 7670, "best_metric": 0.8058387637138367, "best_model_checkpoint": "bert_base_rand_5_v2_mnli/checkpoint-7670", "epoch": 10.0, "eval_steps": 500, "global_step": 15340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.162489891052246, "learning_rate": 4.9e-05, "loss": 0.9841, "step": 1534 }, { "epoch": 1.0, "eval_accuracy": 0.5655629139072847, "eval_loss": 0.916408121585846, "eval_runtime": 6.3023, "eval_samples_per_second": 1557.363, "eval_steps_per_second": 6.188, "step": 1534 }, { "epoch": 2.0, "grad_norm": 1.748944878578186, "learning_rate": 4.8e-05, "loss": 0.8816, "step": 3068 }, { "epoch": 2.0, "eval_accuracy": 0.6031584309730005, "eval_loss": 0.863948404788971, "eval_runtime": 6.2592, "eval_samples_per_second": 1568.093, "eval_steps_per_second": 6.231, "step": 3068 }, { "epoch": 3.0, "grad_norm": 1.7005038261413574, "learning_rate": 4.7e-05, "loss": 0.802, "step": 4602 }, { "epoch": 3.0, "eval_accuracy": 0.6224146714212939, "eval_loss": 0.833983302116394, "eval_runtime": 6.3985, "eval_samples_per_second": 1533.944, "eval_steps_per_second": 6.095, "step": 4602 }, { "epoch": 4.0, "grad_norm": 2.3252110481262207, "learning_rate": 4.600000000000001e-05, "loss": 0.7236, "step": 6136 }, { "epoch": 4.0, "eval_accuracy": 0.6466632705043301, "eval_loss": 0.8122609257698059, "eval_runtime": 6.399, "eval_samples_per_second": 1533.828, "eval_steps_per_second": 6.095, "step": 6136 }, { "epoch": 5.0, "grad_norm": 2.2816672325134277, "learning_rate": 4.5e-05, "loss": 0.6505, "step": 7670 }, { "epoch": 5.0, "eval_accuracy": 0.6625573102394294, "eval_loss": 0.8058387637138367, "eval_runtime": 6.399, "eval_samples_per_second": 1533.842, "eval_steps_per_second": 6.095, "step": 7670 }, { "epoch": 6.0, "grad_norm": 2.778917074203491, "learning_rate": 4.4000000000000006e-05, "loss": 0.5786, "step": 9204 }, { "epoch": 6.0, "eval_accuracy": 0.6624554253693327, "eval_loss": 0.8320516347885132, "eval_runtime": 6.4081, "eval_samples_per_second": 1531.652, "eval_steps_per_second": 6.086, "step": 9204 }, { "epoch": 7.0, "grad_norm": 2.788456439971924, "learning_rate": 4.3e-05, "loss": 0.5046, "step": 10738 }, { "epoch": 7.0, "eval_accuracy": 0.6595007641365257, "eval_loss": 0.8853159546852112, "eval_runtime": 6.4161, "eval_samples_per_second": 1529.748, "eval_steps_per_second": 6.078, "step": 10738 }, { "epoch": 8.0, "grad_norm": 2.967282295227051, "learning_rate": 4.2e-05, "loss": 0.4363, "step": 12272 }, { "epoch": 8.0, "eval_accuracy": 0.658380030565461, "eval_loss": 0.9629728198051453, "eval_runtime": 6.4118, "eval_samples_per_second": 1530.769, "eval_steps_per_second": 6.083, "step": 12272 }, { "epoch": 9.0, "grad_norm": 4.872507572174072, "learning_rate": 4.1e-05, "loss": 0.3721, "step": 13806 }, { "epoch": 9.0, "eval_accuracy": 0.650127356087621, "eval_loss": 1.041638731956482, "eval_runtime": 6.2088, "eval_samples_per_second": 1580.814, "eval_steps_per_second": 6.281, "step": 13806 }, { "epoch": 10.0, "grad_norm": 4.8294453620910645, "learning_rate": 4e-05, "loss": 0.3164, "step": 15340 }, { "epoch": 10.0, "eval_accuracy": 0.6609271523178808, "eval_loss": 1.1638540029525757, "eval_runtime": 6.2489, "eval_samples_per_second": 1570.688, "eval_steps_per_second": 6.241, "step": 15340 }, { "epoch": 10.0, "step": 15340, "total_flos": 5.166258268431053e+17, "train_loss": 0.6249743345974321, "train_runtime": 6473.467, "train_samples_per_second": 3033.166, "train_steps_per_second": 11.848 } ], "logging_steps": 1, "max_steps": 76700, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.166258268431053e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }