ArielUW commited on
Commit
e4a70b1
·
verified ·
1 Parent(s): 241f2ba

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  ner/model filter=lfs diff=lfs merge=lfs -text
37
  pl_hacknationNER-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
38
  tok2vec/model filter=lfs diff=lfs merge=lfs -text
 
 
36
  ner/model filter=lfs diff=lfs merge=lfs -text
37
  pl_hacknationNER-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
38
  tok2vec/model filter=lfs diff=lfs merge=lfs -text
39
+ pl_hacknationner-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -6,7 +6,7 @@ language:
6
  - pl
7
  license: apache-2.0
8
  model-index:
9
- - name: pl_hacknationNER
10
  results:
11
  - task:
12
  name: NER
@@ -14,18 +14,20 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.722972973
18
  - name: NER Recall
19
  type: recall
20
- value: 0.6369047619
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.6772151899
24
  ---
 
 
25
  | Feature | Description |
26
  | --- | --- |
27
  | **Name** | `pl_hacknationNER` |
28
- | **Version** | `1.0` |
29
  | **spaCy** | `>=3.8.3,<3.9.0` |
30
  | **Default Pipeline** | `tok2vec`, `ner` |
31
  | **Components** | `tok2vec`, `ner` |
@@ -38,11 +40,11 @@ model-index:
38
 
39
  <details>
40
 
41
- <summary>View label scheme (17 labels for 1 components)</summary>
42
 
43
  | Component | Labels |
44
  | --- | --- |
45
- | **`ner`** | `ADDRESS`, `AGE`, `BANK-ACCOUNT`, `CITY`, `COMPANY`, `DATE`, `DOCUMENT-NUMBER`, `EMAIL`, `HEALTH`, `JOB-TITLE`, `NAME`, `PESEL`, `PHONE`, `RELATIVE`, `SEX`, `SEXUAL-ORIENTATION`, `SURNAME` |
46
 
47
  </details>
48
 
@@ -50,8 +52,8 @@ model-index:
50
 
51
  | Type | Score |
52
  | --- | --- |
53
- | `ENTS_F` | 67.72 |
54
- | `ENTS_P` | 72.30 |
55
- | `ENTS_R` | 63.69 |
56
- | `TOK2VEC_LOSS` | 4645.64 |
57
- | `NER_LOSS` | 1124.67 |
 
6
  - pl
7
  license: apache-2.0
8
  model-index:
9
+ - name: pl_hacknationner
10
  results:
11
  - task:
12
  name: NER
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.7677725118
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.6403162055
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.6982758621
24
  ---
25
+ second round of training
26
+
27
  | Feature | Description |
28
  | --- | --- |
29
  | **Name** | `pl_hacknationNER` |
30
+ | **Version** | `1.2` |
31
  | **spaCy** | `>=3.8.3,<3.9.0` |
32
  | **Default Pipeline** | `tok2vec`, `ner` |
33
  | **Components** | `tok2vec`, `ner` |
 
40
 
41
  <details>
42
 
43
+ <summary>View label scheme (23 labels for 1 components)</summary>
44
 
45
  | Component | Labels |
46
  | --- | --- |
47
+ | **`ner`** | `ADDRESS`, `AGE`, `BANK-ACCOUNT`, `CITY`, `COMPANY`, `DATE`, `DOCUMENT-NUMBER`, `EMAIL`, `ETHNICITY`, `HEALTH`, `JOB-TITLE`, `NAME`, `PESEL`, `PHONE`, `POLITICAL-VIEW`, `RELATIVE`, `RELIGION`, `SCHOOL-NAME`, `SECRET`, `SEX`, `SEXUAL-ORIENTATION`, `SURNAME`, `USERNAME` |
48
 
49
  </details>
50
 
 
52
 
53
  | Type | Score |
54
  | --- | --- |
55
+ | `ENTS_F` | 69.83 |
56
+ | `ENTS_P` | 76.78 |
57
+ | `ENTS_R` | 64.03 |
58
+ | `TOK2VEC_LOSS` | 58885.11 |
59
+ | `NER_LOSS` | 17819.95 |
__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from spacy.util import load_model_from_init_py, get_model_meta
3
+
4
+
5
+
6
+ __version__ = get_model_meta(Path(__file__).parent)['version']
7
+
8
+
9
+ def load(**overrides):
10
+ return load_model_from_init_py(__file__, **overrides)
meta.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "lang":"pl",
3
  "name":"hacknationNER",
4
- "version":"1.0",
5
- "description":"",
6
  "author":"Ariel",
7
  "email":"ariel.m.drozd@gmail.com",
8
  "url":"",
@@ -28,15 +28,21 @@
28
  "DATE",
29
  "DOCUMENT-NUMBER",
30
  "EMAIL",
 
31
  "HEALTH",
32
  "JOB-TITLE",
33
  "NAME",
34
  "PESEL",
35
  "PHONE",
 
36
  "RELATIVE",
 
 
 
37
  "SEX",
38
  "SEXUAL-ORIENTATION",
39
- "SURNAME"
 
40
  ]
41
  },
42
  "pipeline":[
@@ -51,98 +57,108 @@
51
 
52
  ],
53
  "performance":{
54
- "ents_f":0.6772151899,
55
- "ents_p":0.722972973,
56
- "ents_r":0.6369047619,
57
  "ents_per_type":{
58
- "NAME":{
59
- "p":0.6111111111,
60
- "r":0.44,
61
- "f":0.511627907
62
  },
63
- "SURNAME":{
64
- "p":0.5714285714,
65
- "r":0.7058823529,
66
- "f":0.6315789474
67
  },
68
- "USERNAME":{
69
- "p":0.0,
70
- "r":0.0,
71
- "f":0.0
72
  },
73
- "DATE":{
74
- "p":1.0,
75
- "r":0.6666666667,
76
- "f":0.8
77
  },
78
- "DOCUMENT-NUMBER":{
79
- "p":0.5833333333,
80
- "r":0.5,
81
- "f":0.5384615385
82
  },
83
- "CITY":{
84
- "p":0.619047619,
85
- "r":0.6842105263,
86
- "f":0.65
87
  },
88
- "JOB-TITLE":{
89
- "p":0.8,
90
- "r":0.4,
91
- "f":0.5333333333
92
  },
93
- "SEX":{
94
- "p":1.0,
95
- "r":1.0,
96
- "f":1.0
97
  },
98
  "AGE":{
99
- "p":0.7692307692,
100
- "r":1.0,
101
- "f":0.8695652174
102
  },
103
- "EMAIL":{
104
  "p":1.0,
105
- "r":1.0,
106
- "f":1.0
107
  },
108
- "PHONE":{
109
- "p":0.9285714286,
110
- "r":0.9285714286,
111
- "f":0.9285714286
112
  },
113
- "COMPANY":{
114
- "p":1.0,
115
- "r":0.3333333333,
 
 
 
 
 
116
  "f":0.5
117
  },
118
- "ADDRESS":{
119
- "p":0.7272727273,
120
- "r":0.6666666667,
121
- "f":0.6956521739
122
  },
123
- "BANK-ACCOUNT":{
124
  "p":0.0,
125
  "r":0.0,
126
  "f":0.0
127
  },
128
- "PESEL":{
129
- "p":1.0,
130
- "r":0.8,
131
- "f":0.8888888889
132
  },
133
- "HEALTH":{
134
  "p":0.0,
135
  "r":0.0,
136
  "f":0.0
137
  },
138
- "RELATIVE":{
139
  "p":1.0,
140
  "r":0.3333333333,
141
  "f":0.5
 
 
 
 
 
142
  }
143
  },
144
- "tok2vec_loss":46.4564050462,
145
- "ner_loss":11.2466955185
146
  },
147
  "requirements":[
148
  "spacy>=3.8.3,<3.9.0"
 
1
  {
2
  "lang":"pl",
3
  "name":"hacknationNER",
4
+ "version":"1.2",
5
+ "description":"second round of training",
6
  "author":"Ariel",
7
  "email":"ariel.m.drozd@gmail.com",
8
  "url":"",
 
28
  "DATE",
29
  "DOCUMENT-NUMBER",
30
  "EMAIL",
31
+ "ETHNICITY",
32
  "HEALTH",
33
  "JOB-TITLE",
34
  "NAME",
35
  "PESEL",
36
  "PHONE",
37
+ "POLITICAL-VIEW",
38
  "RELATIVE",
39
+ "RELIGION",
40
+ "SCHOOL-NAME",
41
+ "SECRET",
42
  "SEX",
43
  "SEXUAL-ORIENTATION",
44
+ "SURNAME",
45
+ "USERNAME"
46
  ]
47
  },
48
  "pipeline":[
 
57
 
58
  ],
59
  "performance":{
60
+ "ents_f":0.6982758621,
61
+ "ents_p":0.7677725118,
62
+ "ents_r":0.6403162055,
63
  "ents_per_type":{
64
+ "DATE":{
65
+ "p":0.875,
66
+ "r":0.7,
67
+ "f":0.7777777778
68
  },
69
+ "PESEL":{
70
+ "p":0.7777777778,
71
+ "r":0.6363636364,
72
+ "f":0.7
73
  },
74
+ "DOCUMENT-NUMBER":{
75
+ "p":0.8333333333,
76
+ "r":0.652173913,
77
+ "f":0.7317073171
78
  },
79
+ "JOB-TITLE":{
80
+ "p":0.625,
81
+ "r":0.2777777778,
82
+ "f":0.3846153846
83
  },
84
+ "EMAIL":{
85
+ "p":0.8333333333,
86
+ "r":0.9375,
87
+ "f":0.8823529412
88
  },
89
+ "PHONE":{
90
+ "p":0.6818181818,
91
+ "r":0.8823529412,
92
+ "f":0.7692307692
93
  },
94
+ "NAME":{
95
+ "p":0.7307692308,
96
+ "r":0.76,
97
+ "f":0.7450980392
98
  },
99
+ "CITY":{
100
+ "p":0.7142857143,
101
+ "r":0.6666666667,
102
+ "f":0.6896551724
103
  },
104
  "AGE":{
105
+ "p":0.8125,
106
+ "r":0.8666666667,
107
+ "f":0.8387096774
108
  },
109
+ "SEX":{
110
  "p":1.0,
111
+ "r":0.8181818182,
112
+ "f":0.9
113
  },
114
+ "ADDRESS":{
115
+ "p":0.7368421053,
116
+ "r":0.6086956522,
117
+ "f":0.6666666667
118
  },
119
+ "SURNAME":{
120
+ "p":0.8888888889,
121
+ "r":0.6956521739,
122
+ "f":0.7804878049
123
+ },
124
+ "RELATIVE":{
125
+ "p":0.8,
126
+ "r":0.3636363636,
127
  "f":0.5
128
  },
129
+ "COMPANY":{
130
+ "p":0.5,
131
+ "r":0.1428571429,
132
+ "f":0.2222222222
133
  },
134
+ "HEALTH":{
135
  "p":0.0,
136
  "r":0.0,
137
  "f":0.0
138
  },
139
+ "POLITICAL-VIEW":{
140
+ "p":0.5,
141
+ "r":0.25,
142
+ "f":0.3333333333
143
  },
144
+ "SCHOOL-NAME":{
145
  "p":0.0,
146
  "r":0.0,
147
  "f":0.0
148
  },
149
+ "ETHNICITY":{
150
  "p":1.0,
151
  "r":0.3333333333,
152
  "f":0.5
153
+ },
154
+ "BANK-ACCOUNT":{
155
+ "p":0.0,
156
+ "r":0.0,
157
+ "f":0.0
158
  }
159
  },
160
+ "tok2vec_loss":588.851107855,
161
+ "ner_loss":178.1994781494
162
  },
163
  "requirements":[
164
  "spacy>=3.8.3,<3.9.0"
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b30c1dc7f611a3d155e85e8b506eaa226f43ac02a6582ff3fba6933164f976f
3
- size 144149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87cd3fafd03834601896c8606cf86bbf1dc8336ac35a9e175d69ff4633bb33e8
3
+ size 150389
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{},"1":{"ADDRESS":337,"PHONE":148,"DOCUMENT-NUMBER":105,"CITY":86,"NAME":71,"AGE":61,"SURNAME":60,"EMAIL":50,"JOB-TITLE":39,"SEX":35,"PESEL":34,"DATE":34,"RELATIVE":25,"COMPANY":23,"HEALTH":12,"SEXUAL-ORIENTATION":7,"BANK-ACCOUNT":5},"2":{"ADDRESS":337,"PHONE":148,"DOCUMENT-NUMBER":105,"CITY":86,"NAME":71,"AGE":61,"SURNAME":60,"EMAIL":50,"JOB-TITLE":39,"SEX":35,"PESEL":34,"DATE":34,"RELATIVE":25,"COMPANY":23,"HEALTH":12,"SEXUAL-ORIENTATION":7,"BANK-ACCOUNT":5},"3":{"ADDRESS":337,"PHONE":148,"DOCUMENT-NUMBER":105,"CITY":86,"NAME":71,"AGE":61,"SURNAME":60,"EMAIL":50,"JOB-TITLE":39,"SEX":35,"PESEL":34,"DATE":34,"RELATIVE":25,"COMPANY":23,"HEALTH":12,"SEXUAL-ORIENTATION":7,"BANK-ACCOUNT":5},"4":{"ADDRESS":337,"PHONE":148,"DOCUMENT-NUMBER":105,"CITY":86,"NAME":71,"AGE":61,"SURNAME":60,"EMAIL":50,"JOB-TITLE":39,"SEX":35,"PESEL":34,"DATE":34,"RELATIVE":25,"COMPANY":23,"HEALTH":12,"SEXUAL-ORIENTATION":7,"BANK-ACCOUNT":5,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�0{"0":{},"1":{"ADDRESS":625,"PHONE":317,"DOCUMENT-NUMBER":173,"CITY":153,"NAME":146,"COMPANY":135,"SURNAME":106,"EMAIL":103,"AGE":90,"JOB-TITLE":72,"PESEL":65,"DATE":59,"SCHOOL-NAME":58,"SEX":49,"HEALTH":41,"RELATIVE":37,"USERNAME":21,"POLITICAL-VIEW":13,"SEXUAL-ORIENTATION":11,"ETHNICITY":7,"RELIGION":5,"BANK-ACCOUNT":5,"SECRET":2},"2":{"ADDRESS":625,"PHONE":317,"DOCUMENT-NUMBER":173,"CITY":153,"NAME":146,"COMPANY":135,"SURNAME":106,"EMAIL":103,"AGE":90,"JOB-TITLE":72,"PESEL":65,"DATE":59,"SCHOOL-NAME":58,"SEX":49,"HEALTH":41,"RELATIVE":37,"USERNAME":21,"POLITICAL-VIEW":13,"SEXUAL-ORIENTATION":11,"ETHNICITY":7,"RELIGION":5,"BANK-ACCOUNT":5,"SECRET":2},"3":{"ADDRESS":625,"PHONE":317,"DOCUMENT-NUMBER":173,"CITY":153,"NAME":146,"COMPANY":135,"SURNAME":106,"EMAIL":103,"AGE":90,"JOB-TITLE":72,"PESEL":65,"DATE":59,"SCHOOL-NAME":58,"SEX":49,"HEALTH":41,"RELATIVE":37,"USERNAME":21,"POLITICAL-VIEW":13,"SEXUAL-ORIENTATION":11,"ETHNICITY":7,"RELIGION":5,"BANK-ACCOUNT":5,"SECRET":2},"4":{"ADDRESS":625,"PHONE":317,"DOCUMENT-NUMBER":173,"CITY":153,"NAME":146,"COMPANY":135,"SURNAME":106,"EMAIL":103,"AGE":90,"JOB-TITLE":72,"PESEL":65,"DATE":59,"SCHOOL-NAME":58,"SEX":49,"HEALTH":41,"RELATIVE":37,"USERNAME":21,"POLITICAL-VIEW":13,"SEXUAL-ORIENTATION":11,"ETHNICITY":7,"RELIGION":5,"BANK-ACCOUNT":5,"SECRET":2,"":1},"5":{"":1}}�cfg��neg_key�
pl_hacknationner-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1f579d158191ffc82ea68254ba8984e392a09e04447220bd45f13769b4a7726
3
+ size 5725355
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c49bcbc208e314ffdf5b43aafd3531c6ac691b3f9deec70ea732415987fbe91
3
  size 6009091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c805a95cdf348d2cd142c09a0adbcbefaceb42e32161f8ed8e408372298a6ba3
3
  size 6009091
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff