Update spaCy pipeline
Browse files- config.cfg +83 -4
- en_tako_query_analyzer-any-py3-none-any.whl +2 -2
- lemmatizer/lookups/lookups.bin +3 -0
- meta.json +5 -0
config.cfg
CHANGED
|
@@ -15,7 +15,7 @@ seed = 0
|
|
| 15 |
|
| 16 |
[nlp]
|
| 17 |
lang = "en"
|
| 18 |
-
pipeline = ["tok2vec","ner","textcat_classify"]
|
| 19 |
batch_size = 1000
|
| 20 |
disabled = []
|
| 21 |
before_creation = null
|
|
@@ -26,6 +26,18 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
|
|
| 26 |
|
| 27 |
[components]
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
[components.ner]
|
| 30 |
factory = "ner"
|
| 31 |
incorrect_spans_key = null
|
|
@@ -47,6 +59,45 @@ nO = null
|
|
| 47 |
width = 256
|
| 48 |
upstream = "*"
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
[components.textcat_classify]
|
| 51 |
factory = "weighted_textcat"
|
| 52 |
class_weights = [0.67,0.33]
|
|
@@ -102,6 +153,26 @@ window_size = 1
|
|
| 102 |
maxout_pieces = 3
|
| 103 |
depth = 8
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
[corpora]
|
| 106 |
|
| 107 |
[corpora.dev]
|
|
@@ -189,14 +260,22 @@ eps = 0.00000001
|
|
| 189 |
learn_rate = 0.001
|
| 190 |
|
| 191 |
[training.score_weights]
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
ents_p = 0.0
|
| 194 |
ents_r = 0.0
|
| 195 |
ents_per_type = null
|
| 196 |
-
cats_score = 0.
|
| 197 |
cats_score_desc = null
|
| 198 |
cats_micro_p = null
|
| 199 |
-
cats_micro_r = 0.
|
| 200 |
cats_micro_f = null
|
| 201 |
cats_macro_p = null
|
| 202 |
cats_macro_r = null
|
|
|
|
| 15 |
|
| 16 |
[nlp]
|
| 17 |
lang = "en"
|
| 18 |
+
pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","lemmatizer","tok2vec","ner","textcat_classify"]
|
| 19 |
batch_size = 1000
|
| 20 |
disabled = []
|
| 21 |
before_creation = null
|
|
|
|
| 26 |
|
| 27 |
[components]
|
| 28 |
|
| 29 |
+
[components.attribute_ruler]
|
| 30 |
+
factory = "attribute_ruler"
|
| 31 |
+
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
| 32 |
+
validate = false
|
| 33 |
+
|
| 34 |
+
[components.lemmatizer]
|
| 35 |
+
factory = "lemmatizer"
|
| 36 |
+
mode = "rule"
|
| 37 |
+
model = null
|
| 38 |
+
overwrite = false
|
| 39 |
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
| 40 |
+
|
| 41 |
[components.ner]
|
| 42 |
factory = "ner"
|
| 43 |
incorrect_spans_key = null
|
|
|
|
| 59 |
width = 256
|
| 60 |
upstream = "*"
|
| 61 |
|
| 62 |
+
[components.parser]
|
| 63 |
+
factory = "parser"
|
| 64 |
+
learn_tokens = false
|
| 65 |
+
min_action_freq = 30
|
| 66 |
+
moves = null
|
| 67 |
+
scorer = {"@scorers":"spacy.parser_scorer.v1"}
|
| 68 |
+
update_with_oracle_cut_size = 100
|
| 69 |
+
|
| 70 |
+
[components.parser.model]
|
| 71 |
+
@architectures = "spacy.TransitionBasedParser.v2"
|
| 72 |
+
state_type = "parser"
|
| 73 |
+
extra_state_tokens = false
|
| 74 |
+
hidden_width = 64
|
| 75 |
+
maxout_pieces = 2
|
| 76 |
+
use_upper = true
|
| 77 |
+
nO = null
|
| 78 |
+
|
| 79 |
+
[components.parser.model.tok2vec]
|
| 80 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
| 81 |
+
width = 96
|
| 82 |
+
upstream = "tok2vec"
|
| 83 |
+
|
| 84 |
+
[components.tagger]
|
| 85 |
+
factory = "tagger"
|
| 86 |
+
label_smoothing = 0.0
|
| 87 |
+
neg_prefix = "!"
|
| 88 |
+
overwrite = false
|
| 89 |
+
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
| 90 |
+
|
| 91 |
+
[components.tagger.model]
|
| 92 |
+
@architectures = "spacy.Tagger.v2"
|
| 93 |
+
nO = null
|
| 94 |
+
normalize = false
|
| 95 |
+
|
| 96 |
+
[components.tagger.model.tok2vec]
|
| 97 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
| 98 |
+
width = 96
|
| 99 |
+
upstream = "tok2vec"
|
| 100 |
+
|
| 101 |
[components.textcat_classify]
|
| 102 |
factory = "weighted_textcat"
|
| 103 |
class_weights = [0.67,0.33]
|
|
|
|
| 153 |
maxout_pieces = 3
|
| 154 |
depth = 8
|
| 155 |
|
| 156 |
+
[components.tok2vec_small]
|
| 157 |
+
factory = "tok2vec"
|
| 158 |
+
|
| 159 |
+
[components.tok2vec_small.model]
|
| 160 |
+
@architectures = "spacy.Tok2Vec.v2"
|
| 161 |
+
|
| 162 |
+
[components.tok2vec_small.model.embed]
|
| 163 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
| 164 |
+
width = 96
|
| 165 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
|
| 166 |
+
rows = [5000,1000,2500,2500,50,50]
|
| 167 |
+
include_static_vectors = false
|
| 168 |
+
|
| 169 |
+
[components.tok2vec_small.model.encode]
|
| 170 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
| 171 |
+
width = 96
|
| 172 |
+
depth = 4
|
| 173 |
+
window_size = 1
|
| 174 |
+
maxout_pieces = 3
|
| 175 |
+
|
| 176 |
[corpora]
|
| 177 |
|
| 178 |
[corpora.dev]
|
|
|
|
| 260 |
learn_rate = 0.001
|
| 261 |
|
| 262 |
[training.score_weights]
|
| 263 |
+
tag_acc = 0.25
|
| 264 |
+
dep_uas = 0.12
|
| 265 |
+
dep_las = 0.12
|
| 266 |
+
dep_las_per_type = null
|
| 267 |
+
sents_p = null
|
| 268 |
+
sents_r = null
|
| 269 |
+
sents_f = 0.0
|
| 270 |
+
lemma_acc = 0.25
|
| 271 |
+
ents_f = 0.12
|
| 272 |
ents_p = 0.0
|
| 273 |
ents_r = 0.0
|
| 274 |
ents_per_type = null
|
| 275 |
+
cats_score = 0.06
|
| 276 |
cats_score_desc = null
|
| 277 |
cats_micro_p = null
|
| 278 |
+
cats_micro_r = 0.06
|
| 279 |
cats_micro_f = null
|
| 280 |
cats_macro_p = null
|
| 281 |
cats_macro_r = null
|
en_tako_query_analyzer-any-py3-none-any.whl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d276080166d535d192f52fef9138c4678a32445a14b0c27f9a43c974be5c3aca
|
| 3 |
+
size 619963181
|
lemmatizer/lookups/lookups.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb64f40c0f8396d1762730c0ddf4dad2a52d138f5a389f71a1a1d088173b7737
|
| 3 |
+
size 972893
|
meta.json
CHANGED
|
@@ -120,6 +120,9 @@
|
|
| 120 |
],
|
| 121 |
"attribute_ruler":[
|
| 122 |
|
|
|
|
|
|
|
|
|
|
| 123 |
],
|
| 124 |
"tok2vec":[
|
| 125 |
|
|
@@ -155,6 +158,7 @@
|
|
| 155 |
"tagger",
|
| 156 |
"parser",
|
| 157 |
"attribute_ruler",
|
|
|
|
| 158 |
"tok2vec",
|
| 159 |
"ner",
|
| 160 |
"textcat_classify"
|
|
@@ -164,6 +168,7 @@
|
|
| 164 |
"tagger",
|
| 165 |
"parser",
|
| 166 |
"attribute_ruler",
|
|
|
|
| 167 |
"tok2vec",
|
| 168 |
"ner",
|
| 169 |
"textcat_classify"
|
|
|
|
| 120 |
],
|
| 121 |
"attribute_ruler":[
|
| 122 |
|
| 123 |
+
],
|
| 124 |
+
"lemmatizer":[
|
| 125 |
+
|
| 126 |
],
|
| 127 |
"tok2vec":[
|
| 128 |
|
|
|
|
| 158 |
"tagger",
|
| 159 |
"parser",
|
| 160 |
"attribute_ruler",
|
| 161 |
+
"lemmatizer",
|
| 162 |
"tok2vec",
|
| 163 |
"ner",
|
| 164 |
"textcat_classify"
|
|
|
|
| 168 |
"tagger",
|
| 169 |
"parser",
|
| 170 |
"attribute_ruler",
|
| 171 |
+
"lemmatizer",
|
| 172 |
"tok2vec",
|
| 173 |
"ner",
|
| 174 |
"textcat_classify"
|