noahjax commited on
Commit
6576be0
·
verified ·
1 Parent(s): a71e3f4

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -25,11 +25,11 @@ model-index:
25
  | Feature | Description |
26
  | --- | --- |
27
  | **Name** | `en_tako_query_analyzer` |
28
- | **Version** | `0.0.4` |
29
- | **spaCy** | `>=3.7.5,<3.8.0` |
30
  | **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
31
  | **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
32
- | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
33
  | **Sources** | n/a |
34
  | **License** | n/a |
35
  | **Author** | [n/a]() |
@@ -58,12 +58,12 @@ model-index:
58
  | `ENTS_P` | 0.00 |
59
  | `ENTS_R` | 0.00 |
60
  | `ENTS_PER_TYPE` | 0.00 |
61
- | `CATS_SCORE` | 82.56 |
62
- | `CATS_MICRO_P` | 82.30 |
63
- | `CATS_MICRO_R` | 82.30 |
64
- | `CATS_MICRO_F` | 82.30 |
65
- | `CATS_MACRO_P` | 82.33 |
66
- | `CATS_MACRO_R` | 82.30 |
67
- | `CATS_MACRO_F` | 82.30 |
68
- | `CATS_MACRO_AUC` | 89.93 |
69
- | `TEXTCAT_CLASSIFY_LOSS` | 218.20 |
 
25
  | Feature | Description |
26
  | --- | --- |
27
  | **Name** | `en_tako_query_analyzer` |
28
+ | **Version** | `0.0.5` |
29
+ | **spaCy** | `>=3.8.11,<3.9.0` |
30
  | **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
31
  | **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
32
+ | **Vectors** | 684830 keys, 342918 unique vectors (300 dimensions) |
33
  | **Sources** | n/a |
34
  | **License** | n/a |
35
  | **Author** | [n/a]() |
 
58
  | `ENTS_P` | 0.00 |
59
  | `ENTS_R` | 0.00 |
60
  | `ENTS_PER_TYPE` | 0.00 |
61
+ | `CATS_SCORE` | 84.62 |
62
+ | `CATS_MICRO_P` | 83.50 |
63
+ | `CATS_MICRO_R` | 83.50 |
64
+ | `CATS_MICRO_F` | 83.50 |
65
+ | `CATS_MACRO_P` | 84.23 |
66
+ | `CATS_MACRO_R` | 83.50 |
67
+ | `CATS_MACRO_F` | 83.41 |
68
+ | `CATS_MACRO_AUC` | 83.96 |
69
+ | `TEXTCAT_CLASSIFY_LOSS` | 1108.70 |
config.cfg CHANGED
@@ -1,13 +1,13 @@
1
  [paths]
2
- train = "corpus/filter-train.spacy"
3
- dev = "corpus/filter-test.spacy"
4
  vectors = "en_core_web_lg"
5
  init_tok2vec = null
6
 
7
  [variables]
8
  wandb_project_name = "tako-query-filter"
9
  wandb_team_name = "tako-team"
10
- base_model = "topic/upbeat-bush"
11
 
12
  [system]
13
  gpu_allocator = "pytorch"
@@ -296,7 +296,7 @@ project_name = ${variables.wandb_project_name}
296
  remove_config_values = []
297
  model_log_interval = null
298
  log_dataset_dir = null
299
- entity = null
300
  run_name = null
301
  log_best_dir = null
302
  log_latest_dir = null
@@ -315,6 +315,10 @@ learn_rate = 0.001
315
 
316
  [training.score_weights]
317
  tag_acc = 0.25
 
 
 
 
318
  dep_uas = 0.12
319
  dep_las = 0.12
320
  dep_las_per_type = null
@@ -353,7 +357,7 @@ positive_label = "ACCEPT"
353
 
354
  [initialize.components.textcat_classify.labels]
355
  @readers = "spacy.read_labels.v1"
356
- path = "corpus/labels/filter-labels/textcat_classify.json"
357
  require = false
358
 
359
  [initialize.tokenizer]
 
1
  [paths]
2
+ train = "/Users/noahjax/Work/tako-entity-extractor/corpus/filter-train.spacy"
3
+ dev = "/Users/noahjax/Work/tako-entity-extractor/corpus/filter-test.spacy"
4
  vectors = "en_core_web_lg"
5
  init_tok2vec = null
6
 
7
  [variables]
8
  wandb_project_name = "tako-query-filter"
9
  wandb_team_name = "tako-team"
10
+ base_model = "/Users/noahjax/Work/tako-entity-extractor/training/topic/model-best"
11
 
12
  [system]
13
  gpu_allocator = "pytorch"
 
296
  remove_config_values = []
297
  model_log_interval = null
298
  log_dataset_dir = null
299
+ entity = ${variables.wandb_team_name}
300
  run_name = null
301
  log_best_dir = null
302
  log_latest_dir = null
 
315
 
316
  [training.score_weights]
317
  tag_acc = 0.25
318
+ pos_acc = 0.0
319
+ tag_micro_p = null
320
+ tag_micro_r = null
321
+ tag_micro_f = null
322
  dep_uas = 0.12
323
  dep_las = 0.12
324
  dep_las_per_type = null
 
357
 
358
  [initialize.components.textcat_classify.labels]
359
  @readers = "spacy.read_labels.v1"
360
+ path = "/Users/noahjax/Work/tako-entity-extractor/corpus/labels/filter-labels/textcat_classify.json"
361
  require = false
362
 
363
  [initialize.tokenizer]
en_tako_query_analyzer-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8de7f18d2cc25ababfa5d8131bb5a3d3e4ba8415066bf97493f4b388da4c6e9
3
- size 649860398
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b34711c5f8f8e7d2de88f759dbaf91c708027038921877018f9a56bd186711d
3
+ size 462333461
meta.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "lang":"en",
3
  "name":"tako_query_analyzer",
4
- "version":"0.0.4",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.7.5,<3.8.0",
11
- "spacy_git_version":"a6d0fc360",
12
  "vectors":{
13
  "width":300,
14
- "vectors":514157,
15
- "keys":514157,
16
  "name":"en_vectors"
17
  },
18
  "labels":{
@@ -190,30 +190,30 @@
190
  "ents_p":0.0,
191
  "ents_r":0.0,
192
  "ents_per_type":0.0,
193
- "cats_score":0.8256157635,
194
  "cats_score_desc":"F (ACCEPT)",
195
- "cats_micro_p":0.823,
196
- "cats_micro_r":0.823,
197
- "cats_micro_f":0.823,
198
- "cats_macro_p":0.8232909619,
199
- "cats_macro_r":0.823,
200
- "cats_macro_f":0.822960166,
201
- "cats_macro_auc":0.899297,
202
  "cats_f_per_type":{
203
  "ACCEPT":{
204
- "p":0.813592233,
205
- "r":0.838,
206
- "f":0.8256157635
207
  },
208
  "REJECT":{
209
- "p":0.8329896907,
210
- "r":0.808,
211
- "f":0.8203045685
212
  }
213
  },
214
- "textcat_classify_loss":2.1820269685
215
  },
216
  "requirements":[
217
-
218
  ]
219
  }
 
1
  {
2
  "lang":"en",
3
  "name":"tako_query_analyzer",
4
+ "version":"0.0.5",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.8.11,<3.9.0",
11
+ "spacy_git_version":"e7a662a",
12
  "vectors":{
13
  "width":300,
14
+ "vectors":342918,
15
+ "keys":684830,
16
  "name":"en_vectors"
17
  },
18
  "labels":{
 
190
  "ents_p":0.0,
191
  "ents_r":0.0,
192
  "ents_per_type":0.0,
193
+ "cats_score":0.8462255359,
194
  "cats_score_desc":"F (ACCEPT)",
195
+ "cats_micro_p":0.835,
196
+ "cats_micro_r":0.835,
197
+ "cats_micro_f":0.835,
198
+ "cats_macro_p":0.8422963898,
199
+ "cats_macro_r":0.835,
200
+ "cats_macro_f":0.8341160042,
201
+ "cats_macro_auc":0.83958575,
202
  "cats_f_per_type":{
203
  "ACCEPT":{
204
+ "p":0.7923211169,
205
+ "r":0.908,
206
+ "f":0.8462255359
207
  },
208
  "REJECT":{
209
+ "p":0.8922716628,
210
+ "r":0.762,
211
+ "f":0.8220064725
212
  }
213
  },
214
+ "textcat_classify_loss":11.0869617185
215
  },
216
  "requirements":[
217
+ "spacy>=3.8.11,<3.9.0"
218
  ]
219
  }
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a18b22cda1f4ebaa4786ba94b7baa9379ecca5475386ddd9c8b258717defc3a2
3
  size 2715735
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c629c28cd04f736632f4c75ee41c7b45439a38dc77b72bd20b2c6dd4fcd5f1
3
  size 2715735
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1836fbc02b3924b2fd5f65325c58ae852ff112db1090ca724e5a801e68b85fd
3
  size 319909
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0098910535a863d430082ece57455d5fa071cd4ca3a0054e80582076c752b1e
3
  size 319909
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e62c2504dcdc66144b30c048100af05f12207f933f6d669982b947ae71ffdeef
3
  size 197089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b69e7174a0b6307739adec3c8053ac45fdbad7171c4d3d8bfcd9713ecbc01f8c
3
  size 197089
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
textcat_classify/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f147a53892c746a8f24180b5870c414902c640b10f9ca2927ed2b91a8bf5fd
3
  size 36071049
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fdfd1bf415ae90c99273cb57c6a7ab8e1d430865ff0946c5f3fde3d827c78f9
3
  size 36071049
textcat_multilabel/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c238f141aa54e070622e6506599093673815490619c560feee160d93c37a502a
3
  size 15382175
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9cf2c46c7490c9aa43b83356e1b2cec3e2db257ad4b36cd22c61ea92d9254fd
3
  size 15382175
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f59ffdaeb93abafba597c7089b697ab873950f84b9b9bb5fe16022de2c9702
3
  size 28290008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba7649d58640d75ed1b18f347ab2c0dedf316e46840afcb898b75d6db692239b
3
  size 28290008
tok2vec_small/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42d8414521eaf75f817bd1b351b26039a22a912bb2617f95ead305420f2ebffd
3
  size 6269370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84fc06eb319c94d28e460fc334e292120b01f18baa5dc8b50c977459820a090
3
  size 6269370
vocab/key2row CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31566ae010da3d399eb1d930ae142757afd2601034a4be3bdb00d18881c8c06a
3
- size 7066303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8163b927a234a675074bb38ce62c17a57182998dc83fb9275d35500559a582a
3
+ size 9311659
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33316624b8e8ce9be5d4762929948a8c6bef9a9d6a138aae4110f40cd54858a3
3
- size 10662076
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed26b6bae75e5f9901b6d7c87c8293d75a292d81814631f55d0b046d9857888d
3
+ size 10042571
vocab/vectors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234dcf234bfdf01775ae6182715d55eaacfcde8555b189f25440b56d3c39fd5d
3
- size 616988528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd82f972c4fca3d440c505cdd94c88efdded56457cc86851d584b751f7dea673
3
+ size 411501728