{ "best_metric": 0.0059661865234375, "best_model_checkpoint": "model_fewrel_1_4-task5/checkpoint-630", "epoch": 10.0, "eval_steps": 500, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.017303466796875, "eval_rouge1": 98.4539, "eval_rouge2": 97.5762, "eval_rougeL": 98.1882, "eval_rougeLsum": 98.4482, "eval_runtime": 34.4843, "eval_samples_per_second": 32.479, "eval_steps_per_second": 1.015, "step": 210 }, { "epoch": 2.0, "eval_loss": 0.00751495361328125, "eval_rouge1": 99.1583, "eval_rouge2": 98.6851, "eval_rougeL": 99.0047, "eval_rougeLsum": 99.1647, "eval_runtime": 32.741, "eval_samples_per_second": 34.208, "eval_steps_per_second": 1.069, "step": 420 }, { "epoch": 2.380952380952381, "grad_norm": 0.014579183422029018, "learning_rate": 0.0008665259359149131, "loss": 0.0573, "step": 500 }, { "epoch": 3.0, "eval_loss": 0.0059661865234375, "eval_rouge1": 99.3646, "eval_rouge2": 98.9205, "eval_rougeL": 99.2221, "eval_rougeLsum": 99.3603, "eval_runtime": 32.9254, "eval_samples_per_second": 34.016, "eval_steps_per_second": 1.063, "step": 630 }, { "epoch": 4.0, "eval_loss": 0.00925445556640625, "eval_rouge1": 99.1633, "eval_rouge2": 98.6891, "eval_rougeL": 99.0235, "eval_rougeLsum": 99.1601, "eval_runtime": 33.2811, "eval_samples_per_second": 33.653, "eval_steps_per_second": 1.052, "step": 840 }, { "epoch": 4.761904761904762, "grad_norm": 0.1038060188293457, "learning_rate": 0.0005373650467932121, "loss": 0.0102, "step": 1000 }, { "epoch": 5.0, "eval_loss": 0.007312774658203125, "eval_rouge1": 99.3523, "eval_rouge2": 98.8747, "eval_rougeL": 99.1909, "eval_rougeLsum": 99.3521, "eval_runtime": 33.1107, "eval_samples_per_second": 33.826, "eval_steps_per_second": 1.057, "step": 1050 }, { "epoch": 6.0, "eval_loss": 0.008575439453125, "eval_rouge1": 99.4615, "eval_rouge2": 99.0736, "eval_rougeL": 99.3351, "eval_rougeLsum": 99.4494, "eval_runtime": 32.8562, "eval_samples_per_second": 34.088, "eval_steps_per_second": 1.065, "step": 1260 }, { "epoch": 7.0, "eval_loss": 0.00952911376953125, "eval_rouge1": 99.3799, "eval_rouge2": 99.0097, "eval_rougeL": 99.2562, "eval_rougeLsum": 99.371, "eval_runtime": 33.1813, "eval_samples_per_second": 33.754, "eval_steps_per_second": 1.055, "step": 1470 }, { "epoch": 7.142857142857143, "grad_norm": 0.025980567559599876, "learning_rate": 0.00018825509907063325, "loss": 0.0042, "step": 1500 }, { "epoch": 8.0, "eval_loss": 0.00867462158203125, "eval_rouge1": 99.4488, "eval_rouge2": 99.0827, "eval_rougeL": 99.3346, "eval_rougeLsum": 99.4596, "eval_runtime": 32.7477, "eval_samples_per_second": 34.201, "eval_steps_per_second": 1.069, "step": 1680 }, { "epoch": 9.0, "eval_loss": 0.00786590576171875, "eval_rouge1": 99.3538, "eval_rouge2": 98.9422, "eval_rougeL": 99.2192, "eval_rougeLsum": 99.3563, "eval_runtime": 33.0744, "eval_samples_per_second": 33.863, "eval_steps_per_second": 1.058, "step": 1890 }, { "epoch": 9.523809523809524, "grad_norm": 0.18442556262016296, "learning_rate": 5.5845868874357386e-06, "loss": 0.0023, "step": 2000 }, { "epoch": 10.0, "eval_loss": 0.007965087890625, "eval_rouge1": 99.3538, "eval_rouge2": 98.9422, "eval_rougeL": 99.2192, "eval_rougeLsum": 99.3563, "eval_runtime": 33.0331, "eval_samples_per_second": 33.905, "eval_steps_per_second": 1.06, "step": 2100 } ], "logging_steps": 500, "max_steps": 2100, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3099168784384e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }