Update spaCy pipeline
Browse files- README.md +12 -12
- config.cfg +9 -5
- en_tako_query_analyzer-any-py3-none-any.whl +2 -2
- meta.json +21 -21
- ner/model +1 -1
- parser/model +1 -1
- senter/model +1 -1
- tagger/model +0 -0
- textcat_classify/model +1 -1
- textcat_multilabel/model +1 -1
- tok2vec/model +1 -1
- tok2vec_small/model +1 -1
- vocab/key2row +2 -2
- vocab/strings.json +2 -2
- vocab/vectors +2 -2
README.md
CHANGED
|
@@ -25,11 +25,11 @@ model-index:
|
|
| 25 |
| Feature | Description |
|
| 26 |
| --- | --- |
|
| 27 |
| **Name** | `en_tako_query_analyzer` |
|
| 28 |
-
| **Version** | `0.0.
|
| 29 |
-
| **spaCy** | `>=3.
|
| 30 |
| **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
|
| 31 |
| **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
|
| 32 |
-
| **Vectors** |
|
| 33 |
| **Sources** | n/a |
|
| 34 |
| **License** | n/a |
|
| 35 |
| **Author** | [n/a]() |
|
|
@@ -58,12 +58,12 @@ model-index:
|
|
| 58 |
| `ENTS_P` | 0.00 |
|
| 59 |
| `ENTS_R` | 0.00 |
|
| 60 |
| `ENTS_PER_TYPE` | 0.00 |
|
| 61 |
-
| `CATS_SCORE` |
|
| 62 |
-
| `CATS_MICRO_P` |
|
| 63 |
-
| `CATS_MICRO_R` |
|
| 64 |
-
| `CATS_MICRO_F` |
|
| 65 |
-
| `CATS_MACRO_P` |
|
| 66 |
-
| `CATS_MACRO_R` |
|
| 67 |
-
| `CATS_MACRO_F` |
|
| 68 |
-
| `CATS_MACRO_AUC` |
|
| 69 |
-
| `TEXTCAT_CLASSIFY_LOSS` |
|
|
|
|
| 25 |
| Feature | Description |
|
| 26 |
| --- | --- |
|
| 27 |
| **Name** | `en_tako_query_analyzer` |
|
| 28 |
+
| **Version** | `0.0.5` |
|
| 29 |
+
| **spaCy** | `>=3.8.11,<3.9.0` |
|
| 30 |
| **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
|
| 31 |
| **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
|
| 32 |
+
| **Vectors** | 684830 keys, 342918 unique vectors (300 dimensions) |
|
| 33 |
| **Sources** | n/a |
|
| 34 |
| **License** | n/a |
|
| 35 |
| **Author** | [n/a]() |
|
|
|
|
| 58 |
| `ENTS_P` | 0.00 |
|
| 59 |
| `ENTS_R` | 0.00 |
|
| 60 |
| `ENTS_PER_TYPE` | 0.00 |
|
| 61 |
+
| `CATS_SCORE` | 84.62 |
|
| 62 |
+
| `CATS_MICRO_P` | 83.50 |
|
| 63 |
+
| `CATS_MICRO_R` | 83.50 |
|
| 64 |
+
| `CATS_MICRO_F` | 83.50 |
|
| 65 |
+
| `CATS_MACRO_P` | 84.23 |
|
| 66 |
+
| `CATS_MACRO_R` | 83.50 |
|
| 67 |
+
| `CATS_MACRO_F` | 83.41 |
|
| 68 |
+
| `CATS_MACRO_AUC` | 83.96 |
|
| 69 |
+
| `TEXTCAT_CLASSIFY_LOSS` | 1108.70 |
|
config.cfg
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
[paths]
|
| 2 |
-
train = "corpus/filter-train.spacy"
|
| 3 |
-
dev = "corpus/filter-test.spacy"
|
| 4 |
vectors = "en_core_web_lg"
|
| 5 |
init_tok2vec = null
|
| 6 |
|
| 7 |
[variables]
|
| 8 |
wandb_project_name = "tako-query-filter"
|
| 9 |
wandb_team_name = "tako-team"
|
| 10 |
-
base_model = "topic/
|
| 11 |
|
| 12 |
[system]
|
| 13 |
gpu_allocator = "pytorch"
|
|
@@ -296,7 +296,7 @@ project_name = ${variables.wandb_project_name}
|
|
| 296 |
remove_config_values = []
|
| 297 |
model_log_interval = null
|
| 298 |
log_dataset_dir = null
|
| 299 |
-
entity =
|
| 300 |
run_name = null
|
| 301 |
log_best_dir = null
|
| 302 |
log_latest_dir = null
|
|
@@ -315,6 +315,10 @@ learn_rate = 0.001
|
|
| 315 |
|
| 316 |
[training.score_weights]
|
| 317 |
tag_acc = 0.25
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
dep_uas = 0.12
|
| 319 |
dep_las = 0.12
|
| 320 |
dep_las_per_type = null
|
|
@@ -353,7 +357,7 @@ positive_label = "ACCEPT"
|
|
| 353 |
|
| 354 |
[initialize.components.textcat_classify.labels]
|
| 355 |
@readers = "spacy.read_labels.v1"
|
| 356 |
-
path = "corpus/labels/filter-labels/textcat_classify.json"
|
| 357 |
require = false
|
| 358 |
|
| 359 |
[initialize.tokenizer]
|
|
|
|
| 1 |
[paths]
|
| 2 |
+
train = "/Users/noahjax/Work/tako-entity-extractor/corpus/filter-train.spacy"
|
| 3 |
+
dev = "/Users/noahjax/Work/tako-entity-extractor/corpus/filter-test.spacy"
|
| 4 |
vectors = "en_core_web_lg"
|
| 5 |
init_tok2vec = null
|
| 6 |
|
| 7 |
[variables]
|
| 8 |
wandb_project_name = "tako-query-filter"
|
| 9 |
wandb_team_name = "tako-team"
|
| 10 |
+
base_model = "/Users/noahjax/Work/tako-entity-extractor/training/topic/model-best"
|
| 11 |
|
| 12 |
[system]
|
| 13 |
gpu_allocator = "pytorch"
|
|
|
|
| 296 |
remove_config_values = []
|
| 297 |
model_log_interval = null
|
| 298 |
log_dataset_dir = null
|
| 299 |
+
entity = ${variables.wandb_team_name}
|
| 300 |
run_name = null
|
| 301 |
log_best_dir = null
|
| 302 |
log_latest_dir = null
|
|
|
|
| 315 |
|
| 316 |
[training.score_weights]
|
| 317 |
tag_acc = 0.25
|
| 318 |
+
pos_acc = 0.0
|
| 319 |
+
tag_micro_p = null
|
| 320 |
+
tag_micro_r = null
|
| 321 |
+
tag_micro_f = null
|
| 322 |
dep_uas = 0.12
|
| 323 |
dep_las = 0.12
|
| 324 |
dep_las_per_type = null
|
|
|
|
| 357 |
|
| 358 |
[initialize.components.textcat_classify.labels]
|
| 359 |
@readers = "spacy.read_labels.v1"
|
| 360 |
+
path = "/Users/noahjax/Work/tako-entity-extractor/corpus/labels/filter-labels/textcat_classify.json"
|
| 361 |
require = false
|
| 362 |
|
| 363 |
[initialize.tokenizer]
|
en_tako_query_analyzer-any-py3-none-any.whl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b34711c5f8f8e7d2de88f759dbaf91c708027038921877018f9a56bd186711d
|
| 3 |
+
size 462333461
|
meta.json
CHANGED
|
@@ -1,18 +1,18 @@
|
|
| 1 |
{
|
| 2 |
"lang":"en",
|
| 3 |
"name":"tako_query_analyzer",
|
| 4 |
-
"version":"0.0.
|
| 5 |
"description":"",
|
| 6 |
"author":"",
|
| 7 |
"email":"",
|
| 8 |
"url":"",
|
| 9 |
"license":"",
|
| 10 |
-
"spacy_version":">=3.
|
| 11 |
-
"spacy_git_version":"
|
| 12 |
"vectors":{
|
| 13 |
"width":300,
|
| 14 |
-
"vectors":
|
| 15 |
-
"keys":
|
| 16 |
"name":"en_vectors"
|
| 17 |
},
|
| 18 |
"labels":{
|
|
@@ -190,30 +190,30 @@
|
|
| 190 |
"ents_p":0.0,
|
| 191 |
"ents_r":0.0,
|
| 192 |
"ents_per_type":0.0,
|
| 193 |
-
"cats_score":0.
|
| 194 |
"cats_score_desc":"F (ACCEPT)",
|
| 195 |
-
"cats_micro_p":0.
|
| 196 |
-
"cats_micro_r":0.
|
| 197 |
-
"cats_micro_f":0.
|
| 198 |
-
"cats_macro_p":0.
|
| 199 |
-
"cats_macro_r":0.
|
| 200 |
-
"cats_macro_f":0.
|
| 201 |
-
"cats_macro_auc":0.
|
| 202 |
"cats_f_per_type":{
|
| 203 |
"ACCEPT":{
|
| 204 |
-
"p":0.
|
| 205 |
-
"r":0.
|
| 206 |
-
"f":0.
|
| 207 |
},
|
| 208 |
"REJECT":{
|
| 209 |
-
"p":0.
|
| 210 |
-
"r":0.
|
| 211 |
-
"f":0.
|
| 212 |
}
|
| 213 |
},
|
| 214 |
-
"textcat_classify_loss":
|
| 215 |
},
|
| 216 |
"requirements":[
|
| 217 |
-
|
| 218 |
]
|
| 219 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"lang":"en",
|
| 3 |
"name":"tako_query_analyzer",
|
| 4 |
+
"version":"0.0.5",
|
| 5 |
"description":"",
|
| 6 |
"author":"",
|
| 7 |
"email":"",
|
| 8 |
"url":"",
|
| 9 |
"license":"",
|
| 10 |
+
"spacy_version":">=3.8.11,<3.9.0",
|
| 11 |
+
"spacy_git_version":"e7a662a",
|
| 12 |
"vectors":{
|
| 13 |
"width":300,
|
| 14 |
+
"vectors":342918,
|
| 15 |
+
"keys":684830,
|
| 16 |
"name":"en_vectors"
|
| 17 |
},
|
| 18 |
"labels":{
|
|
|
|
| 190 |
"ents_p":0.0,
|
| 191 |
"ents_r":0.0,
|
| 192 |
"ents_per_type":0.0,
|
| 193 |
+
"cats_score":0.8462255359,
|
| 194 |
"cats_score_desc":"F (ACCEPT)",
|
| 195 |
+
"cats_micro_p":0.835,
|
| 196 |
+
"cats_micro_r":0.835,
|
| 197 |
+
"cats_micro_f":0.835,
|
| 198 |
+
"cats_macro_p":0.8422963898,
|
| 199 |
+
"cats_macro_r":0.835,
|
| 200 |
+
"cats_macro_f":0.8341160042,
|
| 201 |
+
"cats_macro_auc":0.83958575,
|
| 202 |
"cats_f_per_type":{
|
| 203 |
"ACCEPT":{
|
| 204 |
+
"p":0.7923211169,
|
| 205 |
+
"r":0.908,
|
| 206 |
+
"f":0.8462255359
|
| 207 |
},
|
| 208 |
"REJECT":{
|
| 209 |
+
"p":0.8922716628,
|
| 210 |
+
"r":0.762,
|
| 211 |
+
"f":0.8220064725
|
| 212 |
}
|
| 213 |
},
|
| 214 |
+
"textcat_classify_loss":11.0869617185
|
| 215 |
},
|
| 216 |
"requirements":[
|
| 217 |
+
"spacy>=3.8.11,<3.9.0"
|
| 218 |
]
|
| 219 |
}
|
ner/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2715735
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6c629c28cd04f736632f4c75ee41c7b45439a38dc77b72bd20b2c6dd4fcd5f1
|
| 3 |
size 2715735
|
parser/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319909
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0098910535a863d430082ece57455d5fa071cd4ca3a0054e80582076c752b1e
|
| 3 |
size 319909
|
senter/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 197089
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b69e7174a0b6307739adec3c8053ac45fdbad7171c4d3d8bfcd9713ecbc01f8c
|
| 3 |
size 197089
|
tagger/model
CHANGED
|
Binary files a/tagger/model and b/tagger/model differ
|
|
|
textcat_classify/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36071049
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fdfd1bf415ae90c99273cb57c6a7ab8e1d430865ff0946c5f3fde3d827c78f9
|
| 3 |
size 36071049
|
textcat_multilabel/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15382175
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9cf2c46c7490c9aa43b83356e1b2cec3e2db257ad4b36cd22c61ea92d9254fd
|
| 3 |
size 15382175
|
tok2vec/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 28290008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba7649d58640d75ed1b18f347ab2c0dedf316e46840afcb898b75d6db692239b
|
| 3 |
size 28290008
|
tok2vec_small/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6269370
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e84fc06eb319c94d28e460fc334e292120b01f18baa5dc8b50c977459820a090
|
| 3 |
size 6269370
|
vocab/key2row
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8163b927a234a675074bb38ce62c17a57182998dc83fb9275d35500559a582a
|
| 3 |
+
size 9311659
|
vocab/strings.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed26b6bae75e5f9901b6d7c87c8293d75a292d81814631f55d0b046d9857888d
|
| 3 |
+
size 10042571
|
vocab/vectors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd82f972c4fca3d440c505cdd94c88efdded56457cc86851d584b751f7dea673
|
| 3 |
+
size 411501728
|