Commit ·
72546dd
1
Parent(s): 99f5b49
mass config
Browse files- mass-base-uncased-config.json +2 -2
- mass_for_generation.ini +50 -0
mass-base-uncased-config.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
"decoder_embed_dim": 768,
|
| 10 |
"decoder_ffn_embed_dim": 3072,
|
| 11 |
"decoder_layers": 6,
|
| 12 |
-
"decoder_attention_heads":
|
| 13 |
"decoder_normalize_before": 0,
|
| 14 |
"attention_dropout": 0.1,
|
| 15 |
"activation_dropout": 0.1,
|
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"decoder_output_dim": 768,
|
| 25 |
"decoder_input_dim": 768,
|
| 26 |
"no_scale_embedding": 0,
|
| 27 |
-
"adaptive_input":
|
| 28 |
"tie_adaptive_weights": 0,
|
| 29 |
"layernorm_embedding": 1,
|
| 30 |
"encoder_layerdrop": 0,
|
|
|
|
| 9 |
"decoder_embed_dim": 768,
|
| 10 |
"decoder_ffn_embed_dim": 3072,
|
| 11 |
"decoder_layers": 6,
|
| 12 |
+
"decoder_attention_heads": 12,
|
| 13 |
"decoder_normalize_before": 0,
|
| 14 |
"attention_dropout": 0.1,
|
| 15 |
"activation_dropout": 0.1,
|
|
|
|
| 24 |
"decoder_output_dim": 768,
|
| 25 |
"decoder_input_dim": 768,
|
| 26 |
"no_scale_embedding": 0,
|
| 27 |
+
"adaptive_input": 0,
|
| 28 |
"tie_adaptive_weights": 0,
|
| 29 |
"layernorm_embedding": 1,
|
| 30 |
"encoder_layerdrop": 0,
|
mass_for_generation.ini
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[core/auto]
|
| 2 |
+
task_type = core/auto/supervised_task
|
| 3 |
+
cache_dir = ./cache
|
| 4 |
+
|
| 5 |
+
# model
|
| 6 |
+
[core/model/mass]
|
| 7 |
+
pretrained_name = mass-base-uncased
|
| 8 |
+
num_beams = 5
|
| 9 |
+
no_repeat_ngram_size = 0
|
| 10 |
+
max_gen_seq_length = 15
|
| 11 |
+
repetition_penalty = 1.0
|
| 12 |
+
|
| 13 |
+
# dataset
|
| 14 |
+
[core/dataset]
|
| 15 |
+
# data columns: id, num, query, doc, label, score
|
| 16 |
+
data_name = fuliucansheng/data_for_test
|
| 17 |
+
|
| 18 |
+
[core/dataset/train]
|
| 19 |
+
preprocess_funcs = ['core/process/mass_for_generation(query, doc)']
|
| 20 |
+
|
| 21 |
+
[core/dataset/dev]
|
| 22 |
+
preprocess_funcs = ['core/process/mass_for_tokens(query)', 'core/process/mass_for_next_tokens(doc)']
|
| 23 |
+
|
| 24 |
+
[core/dataset/test]
|
| 25 |
+
preprocess_funcs = ['core/process/mass_for_tokens(query)']
|
| 26 |
+
|
| 27 |
+
# process
|
| 28 |
+
[core/process/mass]
|
| 29 |
+
pretrained_name = mass-base-uncased
|
| 30 |
+
max_seq_length = 24
|
| 31 |
+
max_gen_seq_length = 15
|
| 32 |
+
|
| 33 |
+
# task
|
| 34 |
+
[core/auto/supervised_task]
|
| 35 |
+
model = core/model/mass_for_generation
|
| 36 |
+
optim = core/optim/adam
|
| 37 |
+
dataset = core/dataset/auto
|
| 38 |
+
loss_fn = core/loss/lm
|
| 39 |
+
score_fn = core/score/bleu
|
| 40 |
+
monitor_fns = ['core/score/bleu', 'core/score/rouge1', 'core/score/rouge2', 'core/score/rougel']
|
| 41 |
+
output_header = query,doc
|
| 42 |
+
post_process_fn = partial(core/process/mass_for_decode)
|
| 43 |
+
|
| 44 |
+
opt_fp16 = O1
|
| 45 |
+
from_ckpt_dir = ${core/auto:cache_dir}
|
| 46 |
+
to_ckpt_dir = ${core/auto:cache_dir}
|
| 47 |
+
output_path = ${core/auto:cache_dir}/output.txt
|
| 48 |
+
train_batch_size = 128
|
| 49 |
+
dev_batch_size = 128
|
| 50 |
+
test_batch_size = 256
|