Training selesai! Mengunggah model QG Hybrid.

Browse files

Files changed (6) hide show

README.md +19 -23
config.json +4 -4
generation_config.json +6 -5
model.safetensors +2 -2
tokenizer_config.json +1 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -19,15 +19,15 @@ model-index:
 - name: t5_sliding_window
   results:
   - task:
-      type: text2text-generation
       name: Sequence-to-sequence Language Modeling
     dataset:
       name: nl-quad
       type: nl-quad
     metrics:
-    - type: bleu
-      value: 11.77
-      name: Bleu
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,15 +37,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the nl-quad dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.0131
-- Rougel: 34.47
-- Bleu: 11.77
-- Meteor: 33.46
-- Bert Precision: 93.08
-- Bert Recall: 92.29
-- Bert F1: 92.67
-- Qsts Mean: 53.31
-- Gen Len: 16.45
 ## Model description
@@ -78,16 +78,12 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Rougel | Bleu  | Meteor | Bert Precision | Bert Recall | Bert F1 | Qsts Mean | Gen Len |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:-----:|:------:|:--------------:|:-----------:|:-------:|:---------:|:-------:|
-| 5.4740        | 1.0   | 220  | 4.8431          | 20.4   | 1.79  | 18.27  | 86.68          | 89.0        | 87.74   | 24.57     | 19.98   |
-| 4.4216        | 2.0   | 440  | 3.9717          | 21.39  | 3.28  | 20.72  | 90.42          | 89.81       | 90.07   | 34.27     | 19.98   |
-| 3.8590        | 3.0   | 660  | 3.5478          | 25.57  | 5.08  | 24.42  | 90.75          | 90.5        | 90.58   | 39.01     | 19.97   |
-| 3.5622        | 4.0   | 880  | 3.3400          | 29.82  | 7.59  | 28.66  | 92.21          | 91.42       | 91.79   | 45.11     | 17.66   |
-| 3.3504        | 5.0   | 1100 | 3.2076          | 31.62  | 8.88  | 30.09  | 92.66          | 91.74       | 92.18   | 48.18     | 18.44   |
-| 3.1962        | 6.0   | 1320 | 3.1238          | 32.36  | 9.6   | 31.05  | 92.79          | 91.98       | 92.36   | 50.46     | 16.98   |
-| 3.1304        | 7.0   | 1540 | 3.0616          | 34.02  | 11.19 | 32.6   | 93.05          | 92.09       | 92.55   | 52.04     | 15.84   |
-| 3.0061        | 8.0   | 1760 | 3.0315          | 34.06  | 11.38 | 32.99  | 93.04          | 92.2        | 92.6    | 52.32     | 16.73   |
-| 2.9567        | 9.0   | 1980 | 3.0200          | 34.56  | 12.02 | 33.48  | 93.12          | 92.29       | 92.69   | 53.1      | 16.27   |
-| 2.9162        | 10.0  | 2200 | 3.0131          | 34.47  | 11.77 | 33.46  | 93.08          | 92.29       | 92.67   | 53.31     | 16.45   |
 ### Framework versions

 - name: t5_sliding_window
   results:
   - task:
       name: Sequence-to-sequence Language Modeling
+      type: text2text-generation
     dataset:
       name: nl-quad
       type: nl-quad
     metrics:
+    - name: Bleu
+      type: bleu
+      value: 18.36
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the nl-quad dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.0398
+- Rougel: 41.85
+- Bleu: 18.36
+- Meteor: 41.06
+- Bert Precision: 94.18
+- Bert Recall: 93.57
+- Bert F1: 93.86
+- Qsts Mean: 64.0300
+- Gen Len: 13.99
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | Rougel | Bleu  | Meteor | Bert Precision | Bert Recall | Bert F1 | Qsts Mean | Gen Len |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:-----:|:------:|:--------------:|:-----------:|:-------:|:---------:|:-------:|
+| 2.3550        | 1.0   | 249  | 2.1560          | 38.15  | 14.35 | 36.85  | 93.81          | 93.01       | 93.39   | 60.2300   | 12.88   |
+| 2.1237        | 2.0   | 498  | 2.0569          | 39.42  | 15.61 | 38.32  | 93.95          | 93.22       | 93.57   | 61.7100   | 13.29   |
+| 1.9450        | 3.0   | 747  | 2.0384          | 40.29  | 16.47 | 39.36  | 93.95          | 93.34       | 93.63   | 61.9700   | 13.73   |
+| 1.8462        | 4.0   | 996  | 2.0324          | 40.33  | 16.88 | 39.44  | 93.94          | 93.39       | 93.66   | 62.3800   | 14.45   |
+| 1.7684        | 5.0   | 1245 | 2.0337          | 41.01  | 17.87 | 40.13  | 94.03          | 93.5        | 93.75   | 63.8500   | 13.95   |
+| 1.6971        | 6.0   | 1494 | 2.0398          | 41.85  | 18.36 | 41.06  | 94.18          | 93.57       | 93.86   | 64.0300   | 13.99   |
 ### Framework versions

config.json CHANGED Viewed

@@ -3,19 +3,19 @@
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
-  "d_ff": 2048,
   "d_kv": 64,
   "d_model": 768,
   "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
   "dropout_rate": 0.1,
   "dtype": "float32",
   "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
   "initializer_factor": 1.0,
   "is_decoder": false,
   "is_encoder_decoder": true,
-  "is_gated_act": true,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,

     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
+  "d_ff": 3072,
   "d_kv": 64,
   "d_model": 768,
   "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
   "dropout_rate": 0.1,
   "dtype": "float32",
   "eos_token_id": 1,
+  "feed_forward_proj": "relu",
   "initializer_factor": 1.0,
   "is_decoder": false,
   "is_encoder_decoder": true,
+  "is_gated_act": false,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,

generation_config.json CHANGED Viewed

@@ -5,25 +5,26 @@
   "decoder_start_token_id": 0,
   "diversity_penalty": 0.0,
   "do_sample": false,
-  "early_stopping": false,
   "encoder_no_repeat_ngram_size": 0,
   "encoder_repetition_penalty": 1.0,
   "eos_token_id": 1,
   "epsilon_cutoff": 0.0,
   "eta_cutoff": 0.0,
-  "length_penalty": 1.0,
   "max_length": 20,
   "min_length": 0,
-  "no_repeat_ngram_size": 0,
   "num_assistant_tokens": 20,
   "num_assistant_tokens_schedule": "constant",
   "num_beam_groups": 1,
-  "num_beams": 1,
   "num_return_sequences": 1,
   "output_scores": false,
   "pad_token_id": 0,
   "remove_invalid_values": false,
-  "repetition_penalty": 1.0,
   "return_dict_in_generate": false,
   "target_lookbehind": 10,
   "temperature": 1.0,

   "decoder_start_token_id": 0,
   "diversity_penalty": 0.0,
   "do_sample": false,
+  "early_stopping": true,
   "encoder_no_repeat_ngram_size": 0,
   "encoder_repetition_penalty": 1.0,
   "eos_token_id": 1,
   "epsilon_cutoff": 0.0,
   "eta_cutoff": 0.0,
+  "length_penalty": 1,
   "max_length": 20,
+  "max_new_tokens": 64,
   "min_length": 0,
+  "no_repeat_ngram_size": 1,
   "num_assistant_tokens": 20,
   "num_assistant_tokens_schedule": "constant",
   "num_beam_groups": 1,
+  "num_beams": 4,
   "num_return_sequences": 1,
   "output_scores": false,
   "pad_token_id": 0,
   "remove_invalid_values": false,
+  "repetition_penalty": 1,
   "return_dict_in_generate": false,
   "target_lookbehind": 10,
   "temperature": 1.0,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68797fac71fa6c02815f9ccdc062d283c8a1a9134f7919f285c22b4cd68a3f92
-size 891561744

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3204b176fab0876001da80a4c75ec97f76aa95894a94331d44e381e39188ab7
+size 891558696

tokenizer_config.json CHANGED Viewed

@@ -105,9 +105,8 @@
     "<extra_id_99>"
   ],
   "is_local": false,
-  "model_max_length": 512,
   "pad_token": "<pad>",
-  "sp_model_kwargs": {},
   "tokenizer_class": "T5Tokenizer",
   "unk_token": "<unk>"
 }

     "<extra_id_99>"
   ],
   "is_local": false,
+  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "tokenizer_class": "T5Tokenizer",
   "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2e04f4a60800f2208de92988d81110a3cd17c704d825a8ec707a1f2f1dd1375
 size 5393

 version https://git-lfs.github.com/spec/v1
+oid sha256:d93ac8f4afd4e8b5e8f612e558216041aa5d9ee6ffbfa2e3d606c185a8cc015d
 size 5393