Training selesai! Mengunggah model QG Hybrid.
Browse files- README.md +19 -23
- config.json +4 -4
- generation_config.json +6 -5
- model.safetensors +2 -2
- tokenizer_config.json +1 -2
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -19,15 +19,15 @@ model-index:
|
|
| 19 |
- name: t5_sliding_window
|
| 20 |
results:
|
| 21 |
- task:
|
| 22 |
-
type: text2text-generation
|
| 23 |
name: Sequence-to-sequence Language Modeling
|
|
|
|
| 24 |
dataset:
|
| 25 |
name: nl-quad
|
| 26 |
type: nl-quad
|
| 27 |
metrics:
|
| 28 |
-
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
---
|
| 32 |
|
| 33 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
@@ -37,15 +37,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 37 |
|
| 38 |
This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the nl-quad dataset.
|
| 39 |
It achieves the following results on the evaluation set:
|
| 40 |
-
- Loss:
|
| 41 |
-
- Rougel:
|
| 42 |
-
- Bleu:
|
| 43 |
-
- Meteor:
|
| 44 |
-
- Bert Precision:
|
| 45 |
-
- Bert Recall:
|
| 46 |
-
- Bert F1:
|
| 47 |
-
- Qsts Mean:
|
| 48 |
-
- Gen Len:
|
| 49 |
|
| 50 |
## Model description
|
| 51 |
|
|
@@ -78,16 +78,12 @@ The following hyperparameters were used during training:
|
|
| 78 |
|
| 79 |
| Training Loss | Epoch | Step | Validation Loss | Rougel | Bleu | Meteor | Bert Precision | Bert Recall | Bert F1 | Qsts Mean | Gen Len |
|
| 80 |
|:-------------:|:-----:|:----:|:---------------:|:------:|:-----:|:------:|:--------------:|:-----------:|:-------:|:---------:|:-------:|
|
| 81 |
-
|
|
| 82 |
-
|
|
| 83 |
-
|
|
| 84 |
-
|
|
| 85 |
-
|
|
| 86 |
-
|
|
| 87 |
-
| 3.1304 | 7.0 | 1540 | 3.0616 | 34.02 | 11.19 | 32.6 | 93.05 | 92.09 | 92.55 | 52.04 | 15.84 |
|
| 88 |
-
| 3.0061 | 8.0 | 1760 | 3.0315 | 34.06 | 11.38 | 32.99 | 93.04 | 92.2 | 92.6 | 52.32 | 16.73 |
|
| 89 |
-
| 2.9567 | 9.0 | 1980 | 3.0200 | 34.56 | 12.02 | 33.48 | 93.12 | 92.29 | 92.69 | 53.1 | 16.27 |
|
| 90 |
-
| 2.9162 | 10.0 | 2200 | 3.0131 | 34.47 | 11.77 | 33.46 | 93.08 | 92.29 | 92.67 | 53.31 | 16.45 |
|
| 91 |
|
| 92 |
|
| 93 |
### Framework versions
|
|
|
|
| 19 |
- name: t5_sliding_window
|
| 20 |
results:
|
| 21 |
- task:
|
|
|
|
| 22 |
name: Sequence-to-sequence Language Modeling
|
| 23 |
+
type: text2text-generation
|
| 24 |
dataset:
|
| 25 |
name: nl-quad
|
| 26 |
type: nl-quad
|
| 27 |
metrics:
|
| 28 |
+
- name: Bleu
|
| 29 |
+
type: bleu
|
| 30 |
+
value: 18.36
|
| 31 |
---
|
| 32 |
|
| 33 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
|
| 37 |
|
| 38 |
This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the nl-quad dataset.
|
| 39 |
It achieves the following results on the evaluation set:
|
| 40 |
+
- Loss: 2.0398
|
| 41 |
+
- Rougel: 41.85
|
| 42 |
+
- Bleu: 18.36
|
| 43 |
+
- Meteor: 41.06
|
| 44 |
+
- Bert Precision: 94.18
|
| 45 |
+
- Bert Recall: 93.57
|
| 46 |
+
- Bert F1: 93.86
|
| 47 |
+
- Qsts Mean: 64.0300
|
| 48 |
+
- Gen Len: 13.99
|
| 49 |
|
| 50 |
## Model description
|
| 51 |
|
|
|
|
| 78 |
|
| 79 |
| Training Loss | Epoch | Step | Validation Loss | Rougel | Bleu | Meteor | Bert Precision | Bert Recall | Bert F1 | Qsts Mean | Gen Len |
|
| 80 |
|:-------------:|:-----:|:----:|:---------------:|:------:|:-----:|:------:|:--------------:|:-----------:|:-------:|:---------:|:-------:|
|
| 81 |
+
| 2.3550 | 1.0 | 249 | 2.1560 | 38.15 | 14.35 | 36.85 | 93.81 | 93.01 | 93.39 | 60.2300 | 12.88 |
|
| 82 |
+
| 2.1237 | 2.0 | 498 | 2.0569 | 39.42 | 15.61 | 38.32 | 93.95 | 93.22 | 93.57 | 61.7100 | 13.29 |
|
| 83 |
+
| 1.9450 | 3.0 | 747 | 2.0384 | 40.29 | 16.47 | 39.36 | 93.95 | 93.34 | 93.63 | 61.9700 | 13.73 |
|
| 84 |
+
| 1.8462 | 4.0 | 996 | 2.0324 | 40.33 | 16.88 | 39.44 | 93.94 | 93.39 | 93.66 | 62.3800 | 14.45 |
|
| 85 |
+
| 1.7684 | 5.0 | 1245 | 2.0337 | 41.01 | 17.87 | 40.13 | 94.03 | 93.5 | 93.75 | 63.8500 | 13.95 |
|
| 86 |
+
| 1.6971 | 6.0 | 1494 | 2.0398 | 41.85 | 18.36 | 41.06 | 94.18 | 93.57 | 93.86 | 64.0300 | 13.99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
### Framework versions
|
config.json
CHANGED
|
@@ -3,19 +3,19 @@
|
|
| 3 |
"T5ForConditionalGeneration"
|
| 4 |
],
|
| 5 |
"classifier_dropout": 0.0,
|
| 6 |
-
"d_ff":
|
| 7 |
"d_kv": 64,
|
| 8 |
"d_model": 768,
|
| 9 |
"decoder_start_token_id": 0,
|
| 10 |
-
"dense_act_fn": "
|
| 11 |
"dropout_rate": 0.1,
|
| 12 |
"dtype": "float32",
|
| 13 |
"eos_token_id": 1,
|
| 14 |
-
"feed_forward_proj": "
|
| 15 |
"initializer_factor": 1.0,
|
| 16 |
"is_decoder": false,
|
| 17 |
"is_encoder_decoder": true,
|
| 18 |
-
"is_gated_act":
|
| 19 |
"layer_norm_epsilon": 1e-06,
|
| 20 |
"model_type": "t5",
|
| 21 |
"n_positions": 512,
|
|
|
|
| 3 |
"T5ForConditionalGeneration"
|
| 4 |
],
|
| 5 |
"classifier_dropout": 0.0,
|
| 6 |
+
"d_ff": 3072,
|
| 7 |
"d_kv": 64,
|
| 8 |
"d_model": 768,
|
| 9 |
"decoder_start_token_id": 0,
|
| 10 |
+
"dense_act_fn": "relu",
|
| 11 |
"dropout_rate": 0.1,
|
| 12 |
"dtype": "float32",
|
| 13 |
"eos_token_id": 1,
|
| 14 |
+
"feed_forward_proj": "relu",
|
| 15 |
"initializer_factor": 1.0,
|
| 16 |
"is_decoder": false,
|
| 17 |
"is_encoder_decoder": true,
|
| 18 |
+
"is_gated_act": false,
|
| 19 |
"layer_norm_epsilon": 1e-06,
|
| 20 |
"model_type": "t5",
|
| 21 |
"n_positions": 512,
|
generation_config.json
CHANGED
|
@@ -5,25 +5,26 @@
|
|
| 5 |
"decoder_start_token_id": 0,
|
| 6 |
"diversity_penalty": 0.0,
|
| 7 |
"do_sample": false,
|
| 8 |
-
"early_stopping":
|
| 9 |
"encoder_no_repeat_ngram_size": 0,
|
| 10 |
"encoder_repetition_penalty": 1.0,
|
| 11 |
"eos_token_id": 1,
|
| 12 |
"epsilon_cutoff": 0.0,
|
| 13 |
"eta_cutoff": 0.0,
|
| 14 |
-
"length_penalty": 1
|
| 15 |
"max_length": 20,
|
|
|
|
| 16 |
"min_length": 0,
|
| 17 |
-
"no_repeat_ngram_size":
|
| 18 |
"num_assistant_tokens": 20,
|
| 19 |
"num_assistant_tokens_schedule": "constant",
|
| 20 |
"num_beam_groups": 1,
|
| 21 |
-
"num_beams":
|
| 22 |
"num_return_sequences": 1,
|
| 23 |
"output_scores": false,
|
| 24 |
"pad_token_id": 0,
|
| 25 |
"remove_invalid_values": false,
|
| 26 |
-
"repetition_penalty": 1
|
| 27 |
"return_dict_in_generate": false,
|
| 28 |
"target_lookbehind": 10,
|
| 29 |
"temperature": 1.0,
|
|
|
|
| 5 |
"decoder_start_token_id": 0,
|
| 6 |
"diversity_penalty": 0.0,
|
| 7 |
"do_sample": false,
|
| 8 |
+
"early_stopping": true,
|
| 9 |
"encoder_no_repeat_ngram_size": 0,
|
| 10 |
"encoder_repetition_penalty": 1.0,
|
| 11 |
"eos_token_id": 1,
|
| 12 |
"epsilon_cutoff": 0.0,
|
| 13 |
"eta_cutoff": 0.0,
|
| 14 |
+
"length_penalty": 1,
|
| 15 |
"max_length": 20,
|
| 16 |
+
"max_new_tokens": 64,
|
| 17 |
"min_length": 0,
|
| 18 |
+
"no_repeat_ngram_size": 1,
|
| 19 |
"num_assistant_tokens": 20,
|
| 20 |
"num_assistant_tokens_schedule": "constant",
|
| 21 |
"num_beam_groups": 1,
|
| 22 |
+
"num_beams": 4,
|
| 23 |
"num_return_sequences": 1,
|
| 24 |
"output_scores": false,
|
| 25 |
"pad_token_id": 0,
|
| 26 |
"remove_invalid_values": false,
|
| 27 |
+
"repetition_penalty": 1,
|
| 28 |
"return_dict_in_generate": false,
|
| 29 |
"target_lookbehind": 10,
|
| 30 |
"temperature": 1.0,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3204b176fab0876001da80a4c75ec97f76aa95894a94331d44e381e39188ab7
|
| 3 |
+
size 891558696
|
tokenizer_config.json
CHANGED
|
@@ -105,9 +105,8 @@
|
|
| 105 |
"<extra_id_99>"
|
| 106 |
],
|
| 107 |
"is_local": false,
|
| 108 |
-
"model_max_length":
|
| 109 |
"pad_token": "<pad>",
|
| 110 |
-
"sp_model_kwargs": {},
|
| 111 |
"tokenizer_class": "T5Tokenizer",
|
| 112 |
"unk_token": "<unk>"
|
| 113 |
}
|
|
|
|
| 105 |
"<extra_id_99>"
|
| 106 |
],
|
| 107 |
"is_local": false,
|
| 108 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 109 |
"pad_token": "<pad>",
|
|
|
|
| 110 |
"tokenizer_class": "T5Tokenizer",
|
| 111 |
"unk_token": "<unk>"
|
| 112 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5393
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d93ac8f4afd4e8b5e8f612e558216041aa5d9ee6ffbfa2e3d606c185a8cc015d
|
| 3 |
size 5393
|