File size: 2,846 Bytes
34c86f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
[paths]
train = "voice_assistant/data/entity_data/train.spacy"
dev = "voice_assistant/data/entity_data/dev.spacy"
vectors = null
init_tok2vec = null
[system]
seed = 42
gpu_allocator = "pytorch"
[nlp]
lang = "en"
pipeline = ["transformer","ner"]
batch_size = 16
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
vectors = {"@vectors":"spacy.Vectors.v1"}
[components]
[components.ner]
factory = "ner"
incorrect_spans_key = null
moves = null
scorer = {"@scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100
[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 128
maxout_pieces = 2
use_upper = false
nO = null
[components.ner.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
upstream = "transformer"
pooling = {"@layers":"reduce_mean.v1"}
grad_factor = 1.0
[components.transformer]
factory = "transformer"
max_batch_items = 1024
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
[components.transformer.model]
@architectures = "spacy-transformers.TransformerModel.v3"
name = "roberta-base"
get_spans = {"@span_getters":"spacy-transformers.doc_spans.v1"}
mixed_precision = true
[components.transformer.model.grad_scaler_config]
[components.transformer.model.tokenizer_config]
use_fast = true
[components.transformer.model.transformer_config]
[corpora]
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
gold_preproc = false
max_length = 0
limit = 0
augmenter = null
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
gold_preproc = false
max_length = 0
limit = 0
augmenter = null
[training]
train_corpus = "corpora.train"
dev_corpus = "corpora.dev"
seed = 42
gpu_allocator = "pytorch"
dropout = 0.15
max_epochs = 600
patience = 300
eval_frequency = 200
accumulate_gradient = 1
max_steps = 6000
frozen_components = []
annotating_components = []
before_to_disk = null
before_update = null
[training.batcher]
@batchers = "spacy.batch_by_words.v1"
discard_oversize = true
tolerance = 0.2
size = 2048
get_length = null
[training.logger]
@loggers = "spacy.MLflowLogger.v1"
run_id = null
experiment_id = null
run_name = null
nested = false
tags = null
remove_config_values = []
[training.optimizer]
@optimizers = "Adam.v1"
learn_rate = 0.0001
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
beta1 = 0.9
beta2 = 0.999
eps = 0.00000001
use_averages = false
[training.score_weights]
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
ents_per_type = null
[pretraining]
[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer] |