End of training

Browse files

Files changed (9) hide show

README.md +8 -17
config.json +72 -3
generation_config.json +1 -2
model.safetensors +1 -1
runs/Jun04_10-32-09_70c6696ca1f2/events.out.tfevents.1717497134.70c6696ca1f2.34.0 +3 -0
runs/Jun04_10-34-18_70c6696ca1f2/events.out.tfevents.1717497259.70c6696ca1f2.34.1 +3 -0
runs/Jun04_10-34-18_70c6696ca1f2/events.out.tfevents.1717497732.70c6696ca1f2.34.2 +3 -0
tokenizer.json +0 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: google/pegasus-cnn_dailymail
 tags:
 - generated_from_trainer
 metrics:
@@ -14,14 +14,14 @@ should probably proofread and complete it, then remove this comment. -->
 # pegasus-samsum
-This model is a fine-tuned version of [google/pegasus-cnn_dailymail](https://huggingface.co/google/pegasus-cnn_dailymail) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 5.3030
-- Rouge1: 43.8476
-- Rouge2: 15.4851
-- Rougel: 32.5764
-- Rougelsum: 39.3579
-- Gen Len: 127.2582
 ## Model description
@@ -53,15 +53,6 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss | Rouge1  | Rouge2  | Rougel  | Rougelsum | Gen Len  |
-|:-------------:|:------:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:--------:|
-| 6.0132        | 0.1314 | 100  | 5.7494          | 39.0961 | 12.9642 | 29.8168 | 35.3077   | 127.2582 |
-| 5.8274        | 0.2628 | 200  | 5.5325          | 41.7845 | 14.2872 | 31.299  | 37.5468   | 127.2582 |
-| 5.8027        | 0.3942 | 300  | 5.4423          | 42.4827 | 14.6782 | 31.6679 | 38.2157   | 127.2582 |
-| 5.5682        | 0.5256 | 400  | 5.3866          | 43.3037 | 15.0392 | 32.0844 | 38.7795   | 127.2582 |
-| 5.396         | 0.6570 | 500  | 5.3446          | 43.5705 | 15.2971 | 32.2981 | 38.998    | 127.2582 |
-| 5.5759        | 0.7884 | 600  | 5.3106          | 43.8978 | 15.4979 | 32.6086 | 39.4542   | 127.2582 |
-| 5.4803        | 0.9198 | 700  | 5.3030          | 43.8476 | 15.4851 | 32.5764 | 39.3579   | 127.2582 |
 ### Framework versions

 ---
+base_model: google/pegasus-large
 tags:
 - generated_from_trainer
 metrics:
 # pegasus-samsum
+This model is a fine-tuned version of [google/pegasus-large](https://huggingface.co/google/pegasus-large) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 6.2452
+- Rouge1: 34.6462
+- Rouge2: 8.2009
+- Rougel: 24.8919
+- Rougelsum: 31.6451
+- Gen Len: 125.97
 ## Model description
 ### Training results
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "google/pegasus-cnn_dailymail",
   "activation_dropout": 0.1,
   "activation_function": "relu",
   "add_bias_logits": false,
@@ -10,6 +10,7 @@
   "attention_dropout": 0.1,
   "bos_token_id": 0,
   "classif_dropout": 0.0,
   "d_model": 1024,
   "decoder_attention_heads": 16,
   "decoder_ffn_dim": 4096,
@@ -23,7 +24,9 @@
   "encoder_layers": 16,
   "eos_token_id": 1,
   "extra_pos_embeddings": 1,
   "forced_eos_token_id": 1,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
@@ -37,9 +40,8 @@
     "LABEL_2": 2
   },
   "length_penalty": 0.8,
-  "max_length": 128,
   "max_position_embeddings": 1024,
-  "min_length": 32,
   "model_type": "pegasus",
   "normalize_before": true,
   "normalize_embedding": false,
@@ -48,6 +50,73 @@
   "pad_token_id": 0,
   "scale_embedding": true,
   "static_position_embeddings": true,
   "torch_dtype": "float32",
   "transformers_version": "4.41.1",
   "use_cache": true,

 {
+  "_name_or_path": "google/pegasus-large",
   "activation_dropout": 0.1,
   "activation_function": "relu",
   "add_bias_logits": false,
   "attention_dropout": 0.1,
   "bos_token_id": 0,
   "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
   "d_model": 1024,
   "decoder_attention_heads": 16,
   "decoder_ffn_dim": 4096,
   "encoder_layers": 16,
   "eos_token_id": 1,
   "extra_pos_embeddings": 1,
+  "force_bos_token_to_be_generated": false,
   "forced_eos_token_id": 1,
+  "gradient_checkpointing": false,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
     "LABEL_2": 2
   },
   "length_penalty": 0.8,
+  "max_length": 256,
   "max_position_embeddings": 1024,
   "model_type": "pegasus",
   "normalize_before": true,
   "normalize_embedding": false,
   "pad_token_id": 0,
   "scale_embedding": true,
   "static_position_embeddings": true,
+  "task_specific_params": {
+    "summarization_aeslc": {
+      "length_penalty": 0.6,
+      "max_length": 32,
+      "max_position_embeddings": 512
+    },
+    "summarization_arxiv": {
+      "length_penalty": 0.8,
+      "max_length": 256,
+      "max_position_embeddings": 1024
+    },
+    "summarization_big_patent": {
+      "length_penalty": 0.7,
+      "max_length": 256,
+      "max_position_embeddings": 1024
+    },
+    "summarization_billsum": {
+      "length_penalty": 0.6,
+      "max_length": 256,
+      "max_position_embeddings": 1024
+    },
+    "summarization_cnn_dailymail": {
+      "length_penalty": 0.8,
+      "max_length": 128,
+      "max_position_embeddings": 1024
+    },
+    "summarization_gigaword": {
+      "length_penalty": 0.6,
+      "max_length": 32,
+      "max_position_embeddings": 128
+    },
+    "summarization_large": {
+      "length_penalty": 0.8,
+      "max_length": 256,
+      "max_position_embeddings": 1024
+    },
+    "summarization_multi_news": {
+      "length_penalty": 0.8,
+      "max_length": 256,
+      "max_position_embeddings": 1024
+    },
+    "summarization_newsroom": {
+      "length_penalty": 0.8,
+      "max_length": 128,
+      "max_position_embeddings": 512
+    },
+    "summarization_pubmed": {
+      "length_penalty": 0.8,
+      "max_length": 256,
+      "max_position_embeddings": 1024
+    },
+    "summarization_reddit_tifu": {
+      "length_penalty": 0.6,
+      "max_length": 128,
+      "max_position_embeddings": 512
+    },
+    "summarization_wikihow": {
+      "length_penalty": 0.6,
+      "max_length": 256,
+      "max_position_embeddings": 512
+    },
+    "summarization_xsum": {
+      "length_penalty": 0.8,
+      "max_length": 64,
+      "max_position_embeddings": 512
+    }
+  },
   "torch_dtype": "float32",
   "transformers_version": "4.41.1",
   "use_cache": true,

generation_config.json CHANGED Viewed

@@ -5,8 +5,7 @@
   "eos_token_id": 1,
   "forced_eos_token_id": 1,
   "length_penalty": 0.8,
-  "max_length": 128,
-  "min_length": 32,
   "num_beams": 8,
   "pad_token_id": 0,
   "transformers_version": "4.41.1"

   "eos_token_id": 1,
   "forced_eos_token_id": 1,
   "length_penalty": 0.8,
+  "max_length": 256,
   "num_beams": 8,
   "pad_token_id": 0,
   "transformers_version": "4.41.1"

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1434003cb15dbc37232531a5b96b5715ae381d2c69620b272e52b28534b9cee
 size 2283652852

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2e4ee73058cdfaa92d5f197117bda37fdea440f5330078c6a3b109ffd35a7ca
 size 2283652852

runs/Jun04_10-32-09_70c6696ca1f2/events.out.tfevents.1717497134.70c6696ca1f2.34.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f0b6b49f3700339f47afd0f62cfd831a851cbeae427d42c7cbacce7ad5e1642
+size 7262

runs/Jun04_10-34-18_70c6696ca1f2/events.out.tfevents.1717497259.70c6696ca1f2.34.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b95ac13ed74dc4866fd965b542ab8cdb26e99a6bcf553a6014078aa0d4fe64c
+size 8852

runs/Jun04_10-34-18_70c6696ca1f2/events.out.tfevents.1717497732.70c6696ca1f2.34.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e56362f168b75d6f09d2503162279f92f3aa4abef3e750fae8f305a175dc20ea
+size 603

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e30ea9ac8f1fbbea5fce878de9372cdddf3ae7c7b1f8574042a370aa27a85791
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8187be43213b7271b7d487c3ee4f5bc5e9f7a9c46387aecb21ef954e6ea5fe1
 size 5112