Yoga26 commited on
Commit
ca2806c
·
verified ·
1 Parent(s): afcb5ee

Training selesai! Mengunggah model QG Hybrid.

Browse files
README.md CHANGED
@@ -19,15 +19,15 @@ model-index:
19
  - name: t5_sliding_window
20
  results:
21
  - task:
22
- type: text2text-generation
23
  name: Sequence-to-sequence Language Modeling
 
24
  dataset:
25
  name: nl-quad
26
  type: nl-quad
27
  metrics:
28
- - type: bleu
29
- value: 11.77
30
- name: Bleu
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,15 +37,15 @@ should probably proofread and complete it, then remove this comment. -->
37
 
38
  This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the nl-quad dataset.
39
  It achieves the following results on the evaluation set:
40
- - Loss: 3.0131
41
- - Rougel: 34.47
42
- - Bleu: 11.77
43
- - Meteor: 33.46
44
- - Bert Precision: 93.08
45
- - Bert Recall: 92.29
46
- - Bert F1: 92.67
47
- - Qsts Mean: 53.31
48
- - Gen Len: 16.45
49
 
50
  ## Model description
51
 
@@ -78,16 +78,12 @@ The following hyperparameters were used during training:
78
 
79
  | Training Loss | Epoch | Step | Validation Loss | Rougel | Bleu | Meteor | Bert Precision | Bert Recall | Bert F1 | Qsts Mean | Gen Len |
80
  |:-------------:|:-----:|:----:|:---------------:|:------:|:-----:|:------:|:--------------:|:-----------:|:-------:|:---------:|:-------:|
81
- | 5.4740 | 1.0 | 220 | 4.8431 | 20.4 | 1.79 | 18.27 | 86.68 | 89.0 | 87.74 | 24.57 | 19.98 |
82
- | 4.4216 | 2.0 | 440 | 3.9717 | 21.39 | 3.28 | 20.72 | 90.42 | 89.81 | 90.07 | 34.27 | 19.98 |
83
- | 3.8590 | 3.0 | 660 | 3.5478 | 25.57 | 5.08 | 24.42 | 90.75 | 90.5 | 90.58 | 39.01 | 19.97 |
84
- | 3.5622 | 4.0 | 880 | 3.3400 | 29.82 | 7.59 | 28.66 | 92.21 | 91.42 | 91.79 | 45.11 | 17.66 |
85
- | 3.3504 | 5.0 | 1100 | 3.2076 | 31.62 | 8.88 | 30.09 | 92.66 | 91.74 | 92.18 | 48.18 | 18.44 |
86
- | 3.1962 | 6.0 | 1320 | 3.1238 | 32.36 | 9.6 | 31.05 | 92.79 | 91.98 | 92.36 | 50.46 | 16.98 |
87
- | 3.1304 | 7.0 | 1540 | 3.0616 | 34.02 | 11.19 | 32.6 | 93.05 | 92.09 | 92.55 | 52.04 | 15.84 |
88
- | 3.0061 | 8.0 | 1760 | 3.0315 | 34.06 | 11.38 | 32.99 | 93.04 | 92.2 | 92.6 | 52.32 | 16.73 |
89
- | 2.9567 | 9.0 | 1980 | 3.0200 | 34.56 | 12.02 | 33.48 | 93.12 | 92.29 | 92.69 | 53.1 | 16.27 |
90
- | 2.9162 | 10.0 | 2200 | 3.0131 | 34.47 | 11.77 | 33.46 | 93.08 | 92.29 | 92.67 | 53.31 | 16.45 |
91
 
92
 
93
  ### Framework versions
 
19
  - name: t5_sliding_window
20
  results:
21
  - task:
 
22
  name: Sequence-to-sequence Language Modeling
23
+ type: text2text-generation
24
  dataset:
25
  name: nl-quad
26
  type: nl-quad
27
  metrics:
28
+ - name: Bleu
29
+ type: bleu
30
+ value: 18.36
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
37
 
38
  This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the nl-quad dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 2.0398
41
+ - Rougel: 41.85
42
+ - Bleu: 18.36
43
+ - Meteor: 41.06
44
+ - Bert Precision: 94.18
45
+ - Bert Recall: 93.57
46
+ - Bert F1: 93.86
47
+ - Qsts Mean: 64.0300
48
+ - Gen Len: 13.99
49
 
50
  ## Model description
51
 
 
78
 
79
  | Training Loss | Epoch | Step | Validation Loss | Rougel | Bleu | Meteor | Bert Precision | Bert Recall | Bert F1 | Qsts Mean | Gen Len |
80
  |:-------------:|:-----:|:----:|:---------------:|:------:|:-----:|:------:|:--------------:|:-----------:|:-------:|:---------:|:-------:|
81
+ | 2.3550 | 1.0 | 249 | 2.1560 | 38.15 | 14.35 | 36.85 | 93.81 | 93.01 | 93.39 | 60.2300 | 12.88 |
82
+ | 2.1237 | 2.0 | 498 | 2.0569 | 39.42 | 15.61 | 38.32 | 93.95 | 93.22 | 93.57 | 61.7100 | 13.29 |
83
+ | 1.9450 | 3.0 | 747 | 2.0384 | 40.29 | 16.47 | 39.36 | 93.95 | 93.34 | 93.63 | 61.9700 | 13.73 |
84
+ | 1.8462 | 4.0 | 996 | 2.0324 | 40.33 | 16.88 | 39.44 | 93.94 | 93.39 | 93.66 | 62.3800 | 14.45 |
85
+ | 1.7684 | 5.0 | 1245 | 2.0337 | 41.01 | 17.87 | 40.13 | 94.03 | 93.5 | 93.75 | 63.8500 | 13.95 |
86
+ | 1.6971 | 6.0 | 1494 | 2.0398 | 41.85 | 18.36 | 41.06 | 94.18 | 93.57 | 93.86 | 64.0300 | 13.99 |
 
 
 
 
87
 
88
 
89
  ### Framework versions
config.json CHANGED
@@ -3,19 +3,19 @@
3
  "T5ForConditionalGeneration"
4
  ],
5
  "classifier_dropout": 0.0,
6
- "d_ff": 2048,
7
  "d_kv": 64,
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
  "dropout_rate": 0.1,
12
  "dtype": "float32",
13
  "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
  "initializer_factor": 1.0,
16
  "is_decoder": false,
17
  "is_encoder_decoder": true,
18
- "is_gated_act": true,
19
  "layer_norm_epsilon": 1e-06,
20
  "model_type": "t5",
21
  "n_positions": 512,
 
3
  "T5ForConditionalGeneration"
4
  ],
5
  "classifier_dropout": 0.0,
6
+ "d_ff": 3072,
7
  "d_kv": 64,
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
  "dropout_rate": 0.1,
12
  "dtype": "float32",
13
  "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
16
  "is_decoder": false,
17
  "is_encoder_decoder": true,
18
+ "is_gated_act": false,
19
  "layer_norm_epsilon": 1e-06,
20
  "model_type": "t5",
21
  "n_positions": 512,
generation_config.json CHANGED
@@ -5,25 +5,26 @@
5
  "decoder_start_token_id": 0,
6
  "diversity_penalty": 0.0,
7
  "do_sample": false,
8
- "early_stopping": false,
9
  "encoder_no_repeat_ngram_size": 0,
10
  "encoder_repetition_penalty": 1.0,
11
  "eos_token_id": 1,
12
  "epsilon_cutoff": 0.0,
13
  "eta_cutoff": 0.0,
14
- "length_penalty": 1.0,
15
  "max_length": 20,
 
16
  "min_length": 0,
17
- "no_repeat_ngram_size": 0,
18
  "num_assistant_tokens": 20,
19
  "num_assistant_tokens_schedule": "constant",
20
  "num_beam_groups": 1,
21
- "num_beams": 1,
22
  "num_return_sequences": 1,
23
  "output_scores": false,
24
  "pad_token_id": 0,
25
  "remove_invalid_values": false,
26
- "repetition_penalty": 1.0,
27
  "return_dict_in_generate": false,
28
  "target_lookbehind": 10,
29
  "temperature": 1.0,
 
5
  "decoder_start_token_id": 0,
6
  "diversity_penalty": 0.0,
7
  "do_sample": false,
8
+ "early_stopping": true,
9
  "encoder_no_repeat_ngram_size": 0,
10
  "encoder_repetition_penalty": 1.0,
11
  "eos_token_id": 1,
12
  "epsilon_cutoff": 0.0,
13
  "eta_cutoff": 0.0,
14
+ "length_penalty": 1,
15
  "max_length": 20,
16
+ "max_new_tokens": 64,
17
  "min_length": 0,
18
+ "no_repeat_ngram_size": 1,
19
  "num_assistant_tokens": 20,
20
  "num_assistant_tokens_schedule": "constant",
21
  "num_beam_groups": 1,
22
+ "num_beams": 4,
23
  "num_return_sequences": 1,
24
  "output_scores": false,
25
  "pad_token_id": 0,
26
  "remove_invalid_values": false,
27
+ "repetition_penalty": 1,
28
  "return_dict_in_generate": false,
29
  "target_lookbehind": 10,
30
  "temperature": 1.0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68797fac71fa6c02815f9ccdc062d283c8a1a9134f7919f285c22b4cd68a3f92
3
- size 891561744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3204b176fab0876001da80a4c75ec97f76aa95894a94331d44e381e39188ab7
3
+ size 891558696
tokenizer_config.json CHANGED
@@ -105,9 +105,8 @@
105
  "<extra_id_99>"
106
  ],
107
  "is_local": false,
108
- "model_max_length": 512,
109
  "pad_token": "<pad>",
110
- "sp_model_kwargs": {},
111
  "tokenizer_class": "T5Tokenizer",
112
  "unk_token": "<unk>"
113
  }
 
105
  "<extra_id_99>"
106
  ],
107
  "is_local": false,
108
+ "model_max_length": 1000000000000000019884624838656,
109
  "pad_token": "<pad>",
 
110
  "tokenizer_class": "T5Tokenizer",
111
  "unk_token": "<unk>"
112
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e04f4a60800f2208de92988d81110a3cd17c704d825a8ec707a1f2f1dd1375
3
  size 5393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93ac8f4afd4e8b5e8f612e558216041aa5d9ee6ffbfa2e3d606c185a8cc015d
3
  size 5393