Commit
·
a993fba
1
Parent(s):
5c346e9
upd weights: 99% ria, title=36
Browse files- README.md +5 -6
- config.json +5 -3
- pytorch_model.bin +1 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
|
@@ -12,7 +12,7 @@ license: MIT
|
|
| 12 |
|
| 13 |
## Description
|
| 14 |
*bert2bert* model, initialized with the `DeepPavlov/rubert-base-cased` pretrained weights and
|
| 15 |
-
fine-tuned on the first
|
| 16 |
|
| 17 |
## Usage example
|
| 18 |
|
|
@@ -35,7 +35,7 @@ encoded_batch = tokenizer.prepare_seq2seq_batch(
|
|
| 35 |
|
| 36 |
output_ids = model.generate(
|
| 37 |
input_ids=encoded_batch["input_ids"],
|
| 38 |
-
max_length=
|
| 39 |
no_repeat_ngram_size=3,
|
| 40 |
num_beams=5,
|
| 41 |
top_k=0
|
|
@@ -80,7 +80,6 @@ python nlp_headline_rus/src/train_seq2seq.py \
|
|
| 80 |
|
| 81 |
## Validation results
|
| 82 |
|
| 83 |
-
- Using [last 1% of ria](https://drive.google.com/drive/folders/
|
| 84 |
-
- Using [
|
| 85 |
-
- Using [gazeta_ru
|
| 86 |
-
- Using [gazeta_ru val](https://drive.google.com/drive/folders/1BLiL3H0n56e8Q9jSuDgaH_3LLpmKxuVG) split
|
|
|
|
| 12 |
|
| 13 |
## Description
|
| 14 |
*bert2bert* model, initialized with the `DeepPavlov/rubert-base-cased` pretrained weights and
|
| 15 |
+
fine-tuned on the first 99% of ["Rossiya Segodnya" news dataset](https://github.com/RossiyaSegodnya/ria_news_dataset) for 2 epochs.
|
| 16 |
|
| 17 |
## Usage example
|
| 18 |
|
|
|
|
| 35 |
|
| 36 |
output_ids = model.generate(
|
| 37 |
input_ids=encoded_batch["input_ids"],
|
| 38 |
+
max_length=36,
|
| 39 |
no_repeat_ngram_size=3,
|
| 40 |
num_beams=5,
|
| 41 |
top_k=0
|
|
|
|
| 80 |
|
| 81 |
## Validation results
|
| 82 |
|
| 83 |
+
- Using [last 1% of ria](https://drive.google.com/drive/folders/1ztAeyb1BiLMgXwOgOJS7WMR4PGiI1q92) dataset
|
| 84 |
+
- Using [gazeta_ru test](https://drive.google.com/drive/folders/1CyowuRpecsLTcDbqEfmAvkCWOod58g_e) split
|
| 85 |
+
- Using [gazeta_ru val](https://drive.google.com/drive/folders/1XZFOXHSXLKdhzm61ceVLw3aautrdskIu) split
|
|
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/kaggle/input/
|
| 3 |
"architectures": [
|
| 4 |
"EncoderDecoderModel"
|
| 5 |
],
|
|
@@ -19,6 +19,7 @@
|
|
| 19 |
"diversity_penalty": 0.0,
|
| 20 |
"do_sample": false,
|
| 21 |
"early_stopping": false,
|
|
|
|
| 22 |
"eos_token_id": null,
|
| 23 |
"finetuning_task": null,
|
| 24 |
"gradient_checkpointing": false,
|
|
@@ -74,7 +75,7 @@
|
|
| 74 |
"top_k": 50,
|
| 75 |
"top_p": 1.0,
|
| 76 |
"torchscript": false,
|
| 77 |
-
"transformers_version": "4.
|
| 78 |
"type_vocab_size": 2,
|
| 79 |
"use_bfloat16": false,
|
| 80 |
"use_cache": true,
|
|
@@ -98,6 +99,7 @@
|
|
| 98 |
"diversity_penalty": 0.0,
|
| 99 |
"do_sample": false,
|
| 100 |
"early_stopping": false,
|
|
|
|
| 101 |
"eos_token_id": null,
|
| 102 |
"finetuning_task": null,
|
| 103 |
"gradient_checkpointing": false,
|
|
@@ -153,7 +155,7 @@
|
|
| 153 |
"top_k": 50,
|
| 154 |
"top_p": 1.0,
|
| 155 |
"torchscript": false,
|
| 156 |
-
"transformers_version": "4.
|
| 157 |
"type_vocab_size": 2,
|
| 158 |
"use_bfloat16": false,
|
| 159 |
"use_cache": true,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/kaggle/input/bert2bert-wexp/ep_1_v9",
|
| 3 |
"architectures": [
|
| 4 |
"EncoderDecoderModel"
|
| 5 |
],
|
|
|
|
| 19 |
"diversity_penalty": 0.0,
|
| 20 |
"do_sample": false,
|
| 21 |
"early_stopping": false,
|
| 22 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 23 |
"eos_token_id": null,
|
| 24 |
"finetuning_task": null,
|
| 25 |
"gradient_checkpointing": false,
|
|
|
|
| 75 |
"top_k": 50,
|
| 76 |
"top_p": 1.0,
|
| 77 |
"torchscript": false,
|
| 78 |
+
"transformers_version": "4.3.2",
|
| 79 |
"type_vocab_size": 2,
|
| 80 |
"use_bfloat16": false,
|
| 81 |
"use_cache": true,
|
|
|
|
| 99 |
"diversity_penalty": 0.0,
|
| 100 |
"do_sample": false,
|
| 101 |
"early_stopping": false,
|
| 102 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 103 |
"eos_token_id": null,
|
| 104 |
"finetuning_task": null,
|
| 105 |
"gradient_checkpointing": false,
|
|
|
|
| 155 |
"top_k": 50,
|
| 156 |
"top_p": 1.0,
|
| 157 |
"torchscript": false,
|
| 158 |
+
"transformers_version": "4.3.2",
|
| 159 |
"type_vocab_size": 2,
|
| 160 |
"use_bfloat16": false,
|
| 161 |
"use_cache": true,
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 827914439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71f249206ee2da240fc75f3b8d228ceee50861ff493ac0b6437e2509ad2754e0
|
| 3 |
size 827914439
|
tokenizer_config.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/kaggle/input/deeppavlov-rubertbasecased/special_tokens_map.json", "name_or_path": "/kaggle/input/
|
|
|
|
| 1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/kaggle/input/deeppavlov-rubertbasecased/special_tokens_map.json", "name_or_path": "/kaggle/input/bert2bert-wexp/ep_1_v9", "do_basic_tokenize": true, "never_split": null}
|