{ "best_metric": 0.016448974609375, "best_model_checkpoint": "model_fewrel_1_3-task4/checkpoint-1890", "epoch": 10.0, "eval_steps": 500, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.0394287109375, "eval_rouge1": 98.8133, "eval_rouge2": 98.2303, "eval_rougeL": 98.6613, "eval_rougeLsum": 98.8046, "eval_runtime": 26.1194, "eval_samples_per_second": 42.88, "eval_steps_per_second": 1.34, "step": 210 }, { "epoch": 2.0, "eval_loss": 0.032379150390625, "eval_rouge1": 98.0957, "eval_rouge2": 97.1666, "eval_rougeL": 97.8621, "eval_rougeLsum": 98.1056, "eval_runtime": 25.8031, "eval_samples_per_second": 43.406, "eval_steps_per_second": 1.356, "step": 420 }, { "epoch": 2.380952380952381, "grad_norm": 0.016574041917920113, "learning_rate": 0.0008665259359149131, "loss": 0.0725, "step": 500 }, { "epoch": 3.0, "eval_loss": 0.0189056396484375, "eval_rouge1": 99.423, "eval_rouge2": 99.1539, "eval_rougeL": 99.359, "eval_rougeLsum": 99.4285, "eval_runtime": 25.7666, "eval_samples_per_second": 43.467, "eval_steps_per_second": 1.358, "step": 630 }, { "epoch": 4.0, "eval_loss": 0.03497314453125, "eval_rouge1": 98.9701, "eval_rouge2": 98.497, "eval_rougeL": 98.8414, "eval_rougeLsum": 98.9712, "eval_runtime": 25.9235, "eval_samples_per_second": 43.204, "eval_steps_per_second": 1.35, "step": 840 }, { "epoch": 4.761904761904762, "grad_norm": 0.016128525137901306, "learning_rate": 0.0005373650467932121, "loss": 0.0226, "step": 1000 }, { "epoch": 5.0, "eval_loss": 0.0195159912109375, "eval_rouge1": 99.2315, "eval_rouge2": 98.8414, "eval_rougeL": 99.1293, "eval_rougeLsum": 99.2314, "eval_runtime": 26.0919, "eval_samples_per_second": 42.925, "eval_steps_per_second": 1.341, "step": 1050 }, { "epoch": 6.0, "eval_loss": 0.020782470703125, "eval_rouge1": 99.5165, "eval_rouge2": 99.2985, "eval_rougeL": 99.4726, "eval_rougeLsum": 99.5153, "eval_runtime": 25.9934, "eval_samples_per_second": 43.088, "eval_steps_per_second": 1.346, "step": 1260 }, { "epoch": 7.0, "eval_loss": 0.0180206298828125, "eval_rouge1": 99.5187, "eval_rouge2": 99.3048, "eval_rougeL": 99.4708, "eval_rougeLsum": 99.5346, "eval_runtime": 25.8971, "eval_samples_per_second": 43.248, "eval_steps_per_second": 1.352, "step": 1470 }, { "epoch": 7.142857142857143, "grad_norm": 0.08678867667913437, "learning_rate": 0.00018825509907063325, "loss": 0.0096, "step": 1500 }, { "epoch": 8.0, "eval_loss": 0.016693115234375, "eval_rouge1": 99.4715, "eval_rouge2": 99.2326, "eval_rougeL": 99.4096, "eval_rougeLsum": 99.484, "eval_runtime": 25.9209, "eval_samples_per_second": 43.208, "eval_steps_per_second": 1.35, "step": 1680 }, { "epoch": 9.0, "eval_loss": 0.016448974609375, "eval_rouge1": 99.5187, "eval_rouge2": 99.3048, "eval_rougeL": 99.4708, "eval_rougeLsum": 99.5346, "eval_runtime": 25.8641, "eval_samples_per_second": 43.303, "eval_steps_per_second": 1.353, "step": 1890 }, { "epoch": 9.523809523809524, "grad_norm": 0.015132551081478596, "learning_rate": 5.5845868874357386e-06, "loss": 0.0063, "step": 2000 }, { "epoch": 10.0, "eval_loss": 0.0164794921875, "eval_rouge1": 99.5187, "eval_rouge2": 99.3048, "eval_rougeL": 99.4708, "eval_rougeLsum": 99.5346, "eval_runtime": 25.9133, "eval_samples_per_second": 43.221, "eval_steps_per_second": 1.351, "step": 2100 } ], "logging_steps": 500, "max_steps": 2100, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3099168784384e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }