End of training

Browse files

Files changed (7) hide show

README.md +55 -34
config.json +2 -2
generation_config.json +1 -1
pytorch_model.bin +1 -1
tokenizer.json +1 -6
tokenizer_config.json +4 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,28 +1,13 @@
 ---
 license: apache-2.0
-base_model: t5-small
 tags:
 - generated_from_trainer
-datasets:
-- billsum
 metrics:
 - rouge
 model-index:
 - name: test_trainer
-  results:
-  - task:
-      name: Sequence-to-sequence Language Modeling
-      type: text2text-generation
-    dataset:
-      name: billsum
-      type: billsum
-      config: default
-      split: ca_test
-      args: default
-    metrics:
-    - name: Rouge1
-      type: rouge
-      value: 0.1416
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,14 +15,14 @@ should probably proofread and complete it, then remove this comment. -->
 # test_trainer
-This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the billsum dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.5392
-- Rouge1: 0.1416
-- Rouge2: 0.0487
-- Rougel: 0.1147
-- Rougelsum: 0.1146
-- Gen Len: 19.0
 ## Model description
@@ -56,27 +41,63 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 2e-05
-- train_batch_size: 16
-- eval_batch_size: 16
 - seed: 42
-- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 4
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
-| No log        | 1.0   | 62   | 2.8306          | 0.1276 | 0.0376 | 0.1061 | 0.1064    | 19.0    |
-| No log        | 2.0   | 124  | 2.6198          | 0.1367 | 0.0468 | 0.1118 | 0.1117    | 19.0    |
-| No log        | 3.0   | 186  | 2.5563          | 0.1409 | 0.0488 | 0.1144 | 0.1145    | 19.0    |
-| No log        | 4.0   | 248  | 2.5392          | 0.1416 | 0.0487 | 0.1147 | 0.1146    | 19.0    |
 ### Framework versions
-- Transformers 4.33.1
 - Pytorch 2.0.0
 - Datasets 2.14.5
 - Tokenizers 0.13.3

 ---
 license: apache-2.0
+base_model: tvganesh/test_trainer
 tags:
 - generated_from_trainer
 metrics:
 - rouge
 model-index:
 - name: test_trainer
+  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # test_trainer
+This model is a fine-tuned version of [tvganesh/test_trainer](https://huggingface.co/tvganesh/test_trainer) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0000
+- Rouge1: 0.8325
+- Rouge2: 0.8187
+- Rougel: 0.8294
+- Rougelsum: 0.8312
+- Gen Len: 18.6
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0056
+- train_batch_size: 8
+- eval_batch_size: 8
 - seed: 42
+- optimizer: Adam with betas=(0.9,0.99) and epsilon=1e-06
 - lr_scheduler_type: linear
+- num_epochs: 40
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
+| No log        | 1.0   | 5    | 0.2345          | 0.7001 | 0.6536 | 0.6998 | 0.6957    | 16.3    |
+| No log        | 2.0   | 10   | 0.1472          | 0.7958 | 0.7695 | 0.7929 | 0.7965    | 18.3    |
+| No log        | 3.0   | 15   | 0.1174          | 0.7196 | 0.6705 | 0.7187 | 0.7118    | 16.3    |
+| No log        | 4.0   | 20   | 0.0554          | 0.7977 | 0.774  | 0.7907 | 0.7958    | 18.6    |
+| No log        | 5.0   | 25   | 0.0725          | 0.8205 | 0.8074 | 0.8188 | 0.8212    | 18.6    |
+| No log        | 6.0   | 30   | 0.0281          | 0.8114 | 0.7929 | 0.8098 | 0.8123    | 18.6    |
+| No log        | 7.0   | 35   | 0.0451          | 0.7959 | 0.7678 | 0.7908 | 0.7945    | 18.6    |
+| No log        | 8.0   | 40   | 0.0438          | 0.8285 | 0.8061 | 0.8205 | 0.8227    | 18.5    |
+| No log        | 9.0   | 45   | 0.0178          | 0.8249 | 0.8109 | 0.8225 | 0.8243    | 18.6    |
+| No log        | 10.0  | 50   | 0.0072          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 11.0  | 55   | 0.0119          | 0.8336 | 0.8217 | 0.8315 | 0.833     | 18.6    |
+| No log        | 12.0  | 60   | 0.0104          | 0.8336 | 0.8217 | 0.8315 | 0.833     | 18.6    |
+| No log        | 13.0  | 65   | 0.0031          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 14.0  | 70   | 0.0099          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 15.0  | 75   | 0.0067          | 0.8284 | 0.8053 | 0.8213 | 0.8226    | 18.6    |
+| No log        | 16.0  | 80   | 0.0019          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 17.0  | 85   | 0.0173          | 0.8143 | 0.798  | 0.8111 | 0.8102    | 18.2    |
+| No log        | 18.0  | 90   | 0.0007          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 19.0  | 95   | 0.0004          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 20.0  | 100  | 0.0195          | 0.8325 | 0.813  | 0.8294 | 0.8312    | 18.6    |
+| No log        | 21.0  | 105  | 0.0057          | 0.8325 | 0.813  | 0.8294 | 0.8312    | 18.6    |
+| No log        | 22.0  | 110  | 0.0005          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 23.0  | 115  | 0.0010          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 24.0  | 120  | 0.0003          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 25.0  | 125  | 0.0004          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 26.0  | 130  | 0.0005          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 27.0  | 135  | 0.0002          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 28.0  | 140  | 0.0001          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 29.0  | 145  | 0.0010          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 30.0  | 150  | 0.0003          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 31.0  | 155  | 0.0001          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 32.0  | 160  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 33.0  | 165  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 34.0  | 170  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 35.0  | 175  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 36.0  | 180  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 37.0  | 185  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 38.0  | 190  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 39.0  | 195  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
+| No log        | 40.0  | 200  | 0.0000          | 0.8325 | 0.8187 | 0.8294 | 0.8312    | 18.6    |
 ### Framework versions
+- Transformers 4.33.2
 - Pytorch 2.0.0
 - Datasets 2.14.5
 - Tokenizers 0.13.3

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
@@ -55,7 +55,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.33.1",
   "use_cache": true,
   "vocab_size": 32128
 }

 {
+  "_name_or_path": "tvganesh/test_trainer",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.33.2",
   "use_cache": true,
   "vocab_size": 32128
 }

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "decoder_start_token_id": 0,
   "eos_token_id": 1,
   "pad_token_id": 0,
-  "transformers_version": "4.33.1"
 }

   "decoder_start_token_id": 0,
   "eos_token_id": 1,
   "pad_token_id": 0,
+  "transformers_version": "4.33.2"
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81a3bf086c07014a39c291bdc44fc7bdc85496d698124bee21894425cf8e6a6e
 size 242071641

 version https://git-lfs.github.com/spec/v1
+oid sha256:87abea46299a876ed191510462d6237a9cf922a08ed3446e583b32d99888f469
 size 242071641

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 128,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -104,8 +104,12 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "extra_ids": 100,
   "model_max_length": 512,
   "pad_token": "<pad>",
   "tokenizer_class": "T5Tokenizer",
   "unk_token": "<unk>"
 }

   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "extra_ids": 100,
+  "max_length": 128,
   "model_max_length": 512,
   "pad_token": "<pad>",
+  "stride": 0,
   "tokenizer_class": "T5Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5df6a84611b8fba4721e3be98f92dac3f7de7474fcd37ef1f18b7ad01fb4bb85
 size 4155

 version https://git-lfs.github.com/spec/v1
+oid sha256:35ef7e2a5a50fafd356245a180b34be86bae6f57eed5a0fdc3dc5607adf0e1f5
 size 4155