firqaaa commited on
Commit
4a1bc0d
·
verified ·
1 Parent(s): f5ed7b8

Update spaCy pipeline

Browse files
Files changed (9) hide show
  1. README.md +20 -67
  2. config.cfg +16 -76
  3. id_core_news_sm-any-py3-none-any.whl +2 -2
  4. meta.json +102 -461
  5. ner/cfg +13 -0
  6. ner/model +0 -0
  7. ner/moves +1 -0
  8. tok2vec/model +1 -1
  9. vocab/strings.json +0 -0
README.md CHANGED
@@ -8,62 +8,26 @@ model-index:
8
  - name: id_core_news_sm
9
  results:
10
  - task:
11
- name: TAG
12
  type: token-classification
13
  metrics:
14
- - name: TAG (XPOS) Accuracy
15
- type: accuracy
16
- value: 0.9051536414
17
- - task:
18
- name: POS
19
- type: token-classification
20
- metrics:
21
- - name: POS (UPOS) Accuracy
22
- type: accuracy
23
- value: 0.9125297415
24
- - task:
25
- name: MORPH
26
- type: token-classification
27
- metrics:
28
- - name: Morph (UFeats) Accuracy
29
- type: accuracy
30
- value: 0.9296115526
31
- - task:
32
- name: LEMMA
33
- type: token-classification
34
- metrics:
35
- - name: Lemma Accuracy
36
- type: accuracy
37
- value: 0.9369920335
38
- - task:
39
- name: UNLABELED_DEPENDENCIES
40
- type: token-classification
41
- metrics:
42
- - name: Unlabeled Attachment Score (UAS)
43
- type: f_score
44
- value: 0.7753785754
45
- - task:
46
- name: LABELED_DEPENDENCIES
47
- type: token-classification
48
- metrics:
49
- - name: Labeled Attachment Score (LAS)
50
- type: f_score
51
- value: 0.6871555348
52
- - task:
53
- name: SENTS
54
- type: token-classification
55
- metrics:
56
- - name: Sentences F-Score
57
  type: f_score
58
- value: 0.857881137
59
  ---
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `id_core_news_sm` |
63
- | **Version** | `0.0.0` |
64
- | **spaCy** | `>=3.7.2,<3.8.0` |
65
- | **Default Pipeline** | `tok2vec`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
66
- | **Components** | `tok2vec`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
67
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
68
  | **Sources** | n/a |
69
  | **License** | n/a |
@@ -73,13 +37,11 @@ model-index:
73
 
74
  <details>
75
 
76
- <summary>View label scheme (166 labels for 3 components)</summary>
77
 
78
  | Component | Labels |
79
  | --- | --- |
80
- | **`tagger`** | `APP`, `ASP`, `ASP+PS3`, `ASS`, `B--`, `B--+PS3`, `CC-`, `CCONJ`, `CD-`, `CO-`, `D--`, `D--+PS3`, `F--`, `F--+PS2`, `G--`, `G--+PS3`, `H--`, `I--`, `M--`, `M--+PS3`, `NOUN`, `NPD`, `NSD`, `NSD+PS3`, `NSF`, `NSM`, `NUM`, `O--`, `PP1`, `PP2`, `PP3`, `PROPN`, `PS1`, `PS1+VSA`, `PS2`, `PS3`, `R--`, `R--+PS3`, `S--`, `SYM`, `T--`, `VERB`, `VPA`, `VSA`, `VSA+PS2`, `VSA+PS3`, `VSP`, `W--`, `X--`, `Z--` |
81
- | **`morphologizer`** | `POS=PROPN`, `POS=AUX`, `Definite=Ind\|POS=DET\|PronType=Art`, `Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Rel`, `Mood=Ind\|POS=VERB\|Voice=Pass`, `POS=ADP`, `POS=PUNCT`, `POS=NOUN`, `POS=ADV`, `POS=CCONJ`, `POS=SCONJ`, `Mood=Ind\|POS=VERB\|Voice=Act`, `POS=VERB`, `POS=DET\|PronType=Tot`, `Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `POS=PRON\|PronType=Prs\|Reflex=Yes`, `POS=DET\|PronType=Dem`, `NumType=Card\|POS=NUM`, `POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind`, `NumType=Card\|POS=NUM\|PronType=Tot`, `POS=PART\|Polarity=Neg`, `POS=PRON\|PronType=Int`, `NumType=Ord\|POS=ADJ`, `POS=PART`, `POS=PRON\|PronType=Dem`, `POS=DET\|PronType=Ind`, `Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Form\|PronType=Prs`, `POS=ADV\|PronType=Int`, `Clusivity=In\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Definite=Def\|POS=DET\|PronType=Art`, `POS=SYM`, `Degree=Sup\|POS=ADJ`, `POS=INTJ`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `POS=ADV\|PronType=Ind`, `Number=Sing\|POS=PRON\|Person=3\|Polite=Form\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Infm\|PronType=Prs`, `Number=Sing\|POS=PRON\|PronType=Ind`, `POS=VERB\|Voice=Act`, `POS=DET\|PronType=Emp`, `POS=VERB\|Voice=Pass`, `POS=ADV\|PronType=Dem`, `POS=NOUN\|Typo=Yes`, `POS=ADP\|Typo=Yes`, `Number=Plur\|POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes\|Voice=Pass`, `POS=X`, `POS=PRON\|PronType=Tot`, `POS=SCONJ\|Typo=Yes`, `Number=Plur\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `NumType=Card\|POS=NUM\|Typo=Yes`, `Clusivity=Ex\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Foreign=Yes\|POS=X`, `POS=ADV\|PronType=Rel`, `Mood=Imp\|POS=VERB\|Voice=Act`, `Number=Sing\|POS=NOUN\|Typo=Yes`, `POS=PROPN\|Typo=Yes`, `POS=DET`, `Number=Sing\|POS=DET\|PronType=Ind`, `POS=DET\|PronType=Ind\|Typo=Yes`, `Abbr=Yes\|POS=DET\|PronType=Dem`, `POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes`, `Abbr=Yes\|POS=PROPN`, `Abbr=Yes\|POS=PRON\|PronType=Rel`, `Number=Plur\|POS=PRON\|PronType=Int`, `Abbr=Yes\|POS=PART\|Polarity=Neg`, `POS=ADV\|PronType=Tot`, `Abbr=Yes\|POS=ADV`, `POS=ADV\|Typo=Yes`, `POS=X\|Typo=Yes`, `Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `POS=ADV\|PronType=Int\|Typo=Yes`, `NumType=Ord\|POS=ADJ\|Typo=Yes` |
82
- | **`parser`** | `ROOT`, `acl`, `acl:relcl`, `advcl`, `advmod`, `advmod:emph`, `amod`, `appos`, `aux`, `case`, `case:adv`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `dep`, `det`, `fixed`, `flat`, `flat:foreign`, `flat:name`, `mark`, `nmod`, `nmod:lmod`, `nmod:poss`, `nmod:tmod`, `nsubj`, `nsubj:pass`, `nummod`, `obj`, `obl`, `obl:agent`, `obl:tmod`, `parataxis`, `punct`, `xcomp` |
83
 
84
  </details>
85
 
@@ -87,17 +49,8 @@ model-index:
87
 
88
  | Type | Score |
89
  | --- | --- |
90
- | `TAG_ACC` | 90.52 |
91
- | `POS_ACC` | 91.25 |
92
- | `MORPH_ACC` | 92.96 |
93
- | `LEMMA_ACC` | 93.70 |
94
- | `DEP_UAS` | 77.54 |
95
- | `DEP_LAS` | 68.72 |
96
- | `SENTS_P` | 82.72 |
97
- | `SENTS_R` | 89.09 |
98
- | `SENTS_F` | 85.79 |
99
- | `TOK2VEC_LOSS` | 756743.38 |
100
- | `TAGGER_LOSS` | 73614.38 |
101
- | `MORPHOLOGIZER_LOSS` | 155689.33 |
102
- | `TRAINABLE_LEMMATIZER_LOSS` | 35033.93 |
103
- | `PARSER_LOSS` | 1037857.66 |
 
8
  - name: id_core_news_sm
9
  results:
10
  - task:
11
+ name: NER
12
  type: token-classification
13
  metrics:
14
+ - name: NER Precision
15
+ type: precision
16
+ value: 0.6721056721
17
+ - name: NER Recall
18
+ type: recall
19
+ value: 0.6040502793
20
+ - name: NER F Score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  type: f_score
22
+ value: 0.6362633321
23
  ---
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `id_core_news_sm` |
27
+ | **Version** | `0.0.1` |
28
+ | **spaCy** | `>=3.7.4,<3.8.0` |
29
+ | **Default Pipeline** | `tok2vec`, `ner` |
30
+ | **Components** | `tok2vec`, `ner` |
31
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
 
37
 
38
  <details>
39
 
40
+ <summary>View label scheme (18 labels for 1 components)</summary>
41
 
42
  | Component | Labels |
43
  | --- | --- |
44
+ | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
 
 
45
 
46
  </details>
47
 
 
49
 
50
  | Type | Score |
51
  | --- | --- |
52
+ | `ENTS_F` | 63.63 |
53
+ | `ENTS_P` | 67.21 |
54
+ | `ENTS_R` | 60.41 |
55
+ | `TOK2VEC_LOSS` | 45767.58 |
56
+ | `NER_LOSS` | 127721.43 |
 
 
 
 
 
 
 
 
 
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "./id_gsd-ud-train.spacy"
3
- dev = "./id_gsd-ud-dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
@@ -10,7 +10,7 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "id"
13
- pipeline = ["tok2vec","tagger","morphologizer","trainable_lemmatizer","parser"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
@@ -21,58 +21,23 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
24
- [components.morphologizer]
25
- factory = "morphologizer"
26
- extend = false
27
- label_smoothing = 0.05
28
- overwrite = true
29
- scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
30
-
31
- [components.morphologizer.model]
32
- @architectures = "spacy.Tagger.v2"
33
- nO = null
34
- normalize = false
35
-
36
- [components.morphologizer.model.tok2vec]
37
- @architectures = "spacy.Tok2VecListener.v1"
38
- width = ${components.tok2vec.model.encode.width}
39
- upstream = "*"
40
-
41
- [components.parser]
42
- factory = "parser"
43
- learn_tokens = false
44
- min_action_freq = 30
45
  moves = null
46
- scorer = {"@scorers":"spacy.parser_scorer.v1"}
47
  update_with_oracle_cut_size = 100
48
 
49
- [components.parser.model]
50
  @architectures = "spacy.TransitionBasedParser.v2"
51
- state_type = "parser"
52
  extra_state_tokens = false
53
- hidden_width = 128
54
- maxout_pieces = 3
55
  use_upper = true
56
  nO = null
57
 
58
- [components.parser.model.tok2vec]
59
- @architectures = "spacy.Tok2VecListener.v1"
60
- width = ${components.tok2vec.model.encode.width}
61
- upstream = "*"
62
-
63
- [components.tagger]
64
- factory = "tagger"
65
- label_smoothing = 0.05
66
- neg_prefix = "!"
67
- overwrite = false
68
- scorer = {"@scorers":"spacy.tagger_scorer.v1"}
69
-
70
- [components.tagger.model]
71
- @architectures = "spacy.Tagger.v2"
72
- nO = null
73
- normalize = false
74
-
75
- [components.tagger.model.tok2vec]
76
  @architectures = "spacy.Tok2VecListener.v1"
77
  width = ${components.tok2vec.model.encode.width}
78
  upstream = "*"
@@ -97,24 +62,6 @@ depth = 8
97
  window_size = 1
98
  maxout_pieces = 3
99
 
100
- [components.trainable_lemmatizer]
101
- factory = "trainable_lemmatizer"
102
- backoff = "orth"
103
- min_tree_freq = 3
104
- overwrite = false
105
- scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
106
- top_k = 1
107
-
108
- [components.trainable_lemmatizer.model]
109
- @architectures = "spacy.Tagger.v2"
110
- nO = null
111
- normalize = false
112
-
113
- [components.trainable_lemmatizer.model.tok2vec]
114
- @architectures = "spacy.Tok2VecListener.v1"
115
- width = ${components.tok2vec.model.encode.width}
116
- upstream = "*"
117
-
118
  [corpora]
119
 
120
  [corpora.dev]
@@ -178,17 +125,10 @@ eps = 0.00000001
178
  learn_rate = 0.001
179
 
180
  [training.score_weights]
181
- tag_acc = 0.26
182
- pos_acc = 0.12
183
- morph_acc = 0.12
184
- morph_per_feat = null
185
- lemma_acc = 0.26
186
- dep_uas = 0.12
187
- dep_las = 0.12
188
- dep_las_per_type = null
189
- sents_p = null
190
- sents_r = null
191
- sents_f = 0.0
192
 
193
  [pretraining]
194
 
 
1
  [paths]
2
+ train = "./ner_id_train.spacy"
3
+ dev = "./ner_id_dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
10
 
11
  [nlp]
12
  lang = "id"
13
+ pipeline = ["tok2vec","ner"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
 
21
 
22
  [components]
23
 
24
+ [components.ner]
25
+ factory = "ner"
26
+ incorrect_spans_key = null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  moves = null
28
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
29
  update_with_oracle_cut_size = 100
30
 
31
+ [components.ner.model]
32
  @architectures = "spacy.TransitionBasedParser.v2"
33
+ state_type = "ner"
34
  extra_state_tokens = false
35
+ hidden_width = 64
36
+ maxout_pieces = 2
37
  use_upper = true
38
  nO = null
39
 
40
+ [components.ner.model.tok2vec]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  @architectures = "spacy.Tok2VecListener.v1"
42
  width = ${components.tok2vec.model.encode.width}
43
  upstream = "*"
 
62
  window_size = 1
63
  maxout_pieces = 3
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  [corpora]
66
 
67
  [corpora.dev]
 
125
  learn_rate = 0.001
126
 
127
  [training.score_weights]
128
+ ents_f = 1.0
129
+ ents_p = 0.0
130
+ ents_r = 0.0
131
+ ents_per_type = null
 
 
 
 
 
 
 
132
 
133
  [pretraining]
134
 
id_core_news_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8a2b7591fd92c9bc67958b9efb5a7f4b2be00928f42c768933ffc12298300a4
3
- size 34069443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5b707caf40cf9fdea315f2e5d792baa07e4a95b1ca2d7b66447a5dffb556b9
3
+ size 32101183
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"id",
3
  "name":"core_news_sm",
4
- "version":"0.0.0",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.7.2,<3.8.0",
11
- "spacy_git_version":"a89eae928",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -19,495 +19,136 @@
19
  "tok2vec":[
20
 
21
  ],
22
- "tagger":[
23
- "APP",
24
- "ASP",
25
- "ASP+PS3",
26
- "ASS",
27
- "B--",
28
- "B--+PS3",
29
- "CC-",
30
- "CCONJ",
31
- "CD-",
32
- "CO-",
33
- "D--",
34
- "D--+PS3",
35
- "F--",
36
- "F--+PS2",
37
- "G--",
38
- "G--+PS3",
39
- "H--",
40
- "I--",
41
- "M--",
42
- "M--+PS3",
43
- "NOUN",
44
- "NPD",
45
- "NSD",
46
- "NSD+PS3",
47
- "NSF",
48
- "NSM",
49
- "NUM",
50
- "O--",
51
- "PP1",
52
- "PP2",
53
- "PP3",
54
- "PROPN",
55
- "PS1",
56
- "PS1+VSA",
57
- "PS2",
58
- "PS3",
59
- "R--",
60
- "R--+PS3",
61
- "S--",
62
- "SYM",
63
- "T--",
64
- "VERB",
65
- "VPA",
66
- "VSA",
67
- "VSA+PS2",
68
- "VSA+PS3",
69
- "VSP",
70
- "W--",
71
- "X--",
72
- "Z--"
73
- ],
74
- "morphologizer":[
75
- "POS=PROPN",
76
- "POS=AUX",
77
- "Definite=Ind|POS=DET|PronType=Art",
78
- "Number=Sing|POS=NOUN",
79
- "POS=PRON|PronType=Rel",
80
- "Mood=Ind|POS=VERB|Voice=Pass",
81
- "POS=ADP",
82
- "POS=PUNCT",
83
- "POS=NOUN",
84
- "POS=ADV",
85
- "POS=CCONJ",
86
- "POS=SCONJ",
87
- "Mood=Ind|POS=VERB|Voice=Act",
88
- "POS=VERB",
89
- "POS=DET|PronType=Tot",
90
- "Number=Sing|POS=PRON|Person=3|PronType=Prs",
91
- "Number=Plur|POS=PRON|Person=3|PronType=Prs",
92
- "POS=PRON|PronType=Prs|Reflex=Yes",
93
- "POS=DET|PronType=Dem",
94
- "NumType=Card|POS=NUM",
95
- "POS=ADJ",
96
- "Number=Plur|POS=DET|PronType=Ind",
97
- "NumType=Card|POS=NUM|PronType=Tot",
98
- "POS=PART|Polarity=Neg",
99
- "POS=PRON|PronType=Int",
100
- "NumType=Ord|POS=ADJ",
101
- "POS=PART",
102
- "POS=PRON|PronType=Dem",
103
- "POS=DET|PronType=Ind",
104
- "Number=Plur|POS=NOUN",
105
- "Number=Sing|POS=PRON|Person=1|Polite=Form|PronType=Prs",
106
- "POS=ADV|PronType=Int",
107
- "Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs",
108
- "Definite=Def|POS=DET|PronType=Art",
109
- "POS=SYM",
110
- "Degree=Sup|POS=ADJ",
111
- "POS=INTJ",
112
- "Number=Sing|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
113
- "POS=ADV|PronType=Ind",
114
- "Number=Sing|POS=PRON|Person=3|Polite=Form|PronType=Prs",
115
- "Number=Sing|POS=PRON|Person=1|Polite=Infm|PronType=Prs",
116
- "Number=Sing|POS=PRON|PronType=Ind",
117
- "POS=VERB|Voice=Act",
118
- "POS=DET|PronType=Emp",
119
- "POS=VERB|Voice=Pass",
120
- "POS=ADV|PronType=Dem",
121
- "POS=NOUN|Typo=Yes",
122
- "POS=ADP|Typo=Yes",
123
- "Number=Plur|POS=PRON|PronType=Ind",
124
- "POS=VERB|Typo=Yes|Voice=Pass",
125
- "POS=X",
126
- "POS=PRON|PronType=Tot",
127
- "POS=SCONJ|Typo=Yes",
128
- "Number=Plur|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
129
- "NumType=Card|POS=NUM|Typo=Yes",
130
- "Clusivity=Ex|Number=Plur|POS=PRON|Person=1|PronType=Prs",
131
- "Number=Sing|POS=PRON|Person=2|Polite=Form|PronType=Prs",
132
- "Foreign=Yes|POS=X",
133
- "POS=ADV|PronType=Rel",
134
- "Mood=Imp|POS=VERB|Voice=Act",
135
- "Number=Sing|POS=NOUN|Typo=Yes",
136
- "POS=PROPN|Typo=Yes",
137
- "POS=DET",
138
- "Number=Sing|POS=DET|PronType=Ind",
139
- "POS=DET|PronType=Ind|Typo=Yes",
140
- "Abbr=Yes|POS=DET|PronType=Dem",
141
- "POS=PRON|PronType=Ind",
142
- "POS=VERB|Typo=Yes",
143
- "Abbr=Yes|POS=PROPN",
144
- "Abbr=Yes|POS=PRON|PronType=Rel",
145
- "Number=Plur|POS=PRON|PronType=Int",
146
- "Abbr=Yes|POS=PART|Polarity=Neg",
147
- "POS=ADV|PronType=Tot",
148
- "Abbr=Yes|POS=ADV",
149
- "POS=ADV|Typo=Yes",
150
- "POS=X|Typo=Yes",
151
- "Number=Sing|POS=PRON|Person=2|PronType=Prs",
152
- "POS=ADV|PronType=Int|Typo=Yes",
153
- "NumType=Ord|POS=ADJ|Typo=Yes"
154
- ],
155
- "parser":[
156
- "ROOT",
157
- "acl",
158
- "acl:relcl",
159
- "advcl",
160
- "advmod",
161
- "advmod:emph",
162
- "amod",
163
- "appos",
164
- "aux",
165
- "case",
166
- "case:adv",
167
- "cc",
168
- "ccomp",
169
- "compound",
170
- "conj",
171
- "cop",
172
- "dep",
173
- "det",
174
- "fixed",
175
- "flat",
176
- "flat:foreign",
177
- "flat:name",
178
- "mark",
179
- "nmod",
180
- "nmod:lmod",
181
- "nmod:poss",
182
- "nmod:tmod",
183
- "nsubj",
184
- "nsubj:pass",
185
- "nummod",
186
- "obj",
187
- "obl",
188
- "obl:agent",
189
- "obl:tmod",
190
- "parataxis",
191
- "punct",
192
- "xcomp"
193
  ]
194
  },
195
  "pipeline":[
196
  "tok2vec",
197
- "tagger",
198
- "morphologizer",
199
- "trainable_lemmatizer",
200
- "parser"
201
  ],
202
  "components":[
203
  "tok2vec",
204
- "tagger",
205
- "morphologizer",
206
- "trainable_lemmatizer",
207
- "parser"
208
  ],
209
  "disabled":[
210
 
211
  ],
212
  "performance":{
213
- "tag_acc":0.9051536414,
214
- "pos_acc":0.9125297415,
215
- "morph_acc":0.9296115526,
216
- "morph_per_feat":{
217
- "Number":{
218
- "p":0.9799159271,
219
- "r":0.8237141735,
220
- "f":0.8950511945
221
- },
222
- "Mood":{
223
- "p":0.997098646,
224
- "r":0.9246636771,
225
- "f":0.959516054
226
- },
227
- "Voice":{
228
- "p":0.9942084942,
229
- "r":0.9221128021,
230
- "f":0.9568044589
231
- },
232
- "PronType":{
233
- "p":0.9915662651,
234
- "r":0.7786187323,
235
- "f":0.8722840488
236
- },
237
- "Polarity":{
238
- "p":1.0,
239
- "r":0.862745098,
240
- "f":0.9263157895
241
- },
242
- "Person":{
243
- "p":1.0,
244
- "r":0.4039735099,
245
- "f":0.5754716981
246
- },
247
- "NumType":{
248
- "p":0.9952606635,
249
- "r":0.9480812641,
250
- "f":0.9710982659
251
- },
252
- "Typo":{
253
- "p":1.0,
254
- "r":0.4,
255
- "f":0.5714285714
256
- },
257
- "Definite":{
258
- "p":0.9838709677,
259
- "r":0.7922077922,
260
- "f":0.8776978417
261
- },
262
- "Polite":{
263
- "p":1.0,
264
- "r":0.71875,
265
- "f":0.8363636364
266
- },
267
- "Reflex":{
268
- "p":1.0,
269
- "r":0.5,
270
- "f":0.6666666667
271
- },
272
- "Degree":{
273
- "p":0.9375,
274
- "r":0.8823529412,
275
- "f":0.9090909091
276
- },
277
- "Foreign":{
278
- "p":1.0,
279
- "r":0.0625,
280
  "f":0.1176470588
281
  },
282
- "Clusivity":{
283
- "p":1.0,
284
- "r":1.0,
285
- "f":1.0
286
- },
287
- "Abbr":{
288
- "p":1.0,
289
- "r":0.2,
290
- "f":0.3333333333
291
- }
292
- },
293
- "lemma_acc":0.9369920335,
294
- "dep_uas":0.7753785754,
295
- "dep_las":0.6871555348,
296
- "dep_las_per_type":{
297
- "nsubj":{
298
- "p":0.7731092437,
299
- "r":0.7459459459,
300
- "f":0.7592847318
301
- },
302
- "compound":{
303
- "p":0.6962676963,
304
- "r":0.6874205845,
305
- "f":0.6918158568
306
- },
307
- "root":{
308
- "p":0.7585616438,
309
- "r":0.7924865832,
310
- "f":0.7751531059
311
- },
312
- "obj":{
313
- "p":0.7978142077,
314
- "r":0.7630662021,
315
- "f":0.7800534283
316
- },
317
- "case":{
318
- "p":0.9049295775,
319
- "r":0.8877374784,
320
- "f":0.8962510898
321
- },
322
- "obl":{
323
- "p":0.6753246753,
324
- "r":0.6409861325,
325
- "f":0.6577075099
326
- },
327
- "amod":{
328
- "p":0.6421568627,
329
- "r":0.568329718,
330
- "f":0.6029919448
331
- },
332
- "conj":{
333
- "p":0.5394736842,
334
- "r":0.5354477612,
335
- "f":0.5374531835
336
- },
337
- "cc":{
338
- "p":0.8705882353,
339
- "r":0.8433048433,
340
- "f":0.8567293777
341
- },
342
- "acl:relcl":{
343
- "p":0.7086614173,
344
- "r":0.6428571429,
345
- "f":0.6741573034
346
- },
347
- "flat:name":{
348
- "p":0.7908571429,
349
- "r":0.8218527316,
350
- "f":0.8060570763
351
- },
352
- "advmod":{
353
- "p":0.7546174142,
354
- "r":0.7132169576,
355
- "f":0.7333333333
356
- },
357
- "nmod":{
358
- "p":0.6468085106,
359
- "r":0.5435041716,
360
- "f":0.5906735751
361
- },
362
- "nsubj:pass":{
363
- "p":0.698630137,
364
- "r":0.7083333333,
365
- "f":0.7034482759
366
- },
367
- "det":{
368
- "p":0.8745980707,
369
- "r":0.7661971831,
370
- "f":0.8168168168
371
- },
372
- "aux":{
373
- "p":0.9064748201,
374
- "r":0.9402985075,
375
- "f":0.9230769231
376
- },
377
- "nmod:poss":{
378
- "p":0.5714285714,
379
- "r":0.0727272727,
380
- "f":0.1290322581
381
- },
382
- "dep":{
383
- "p":0.0967741935,
384
- "r":0.0535714286,
385
- "f":0.0689655172
386
  },
387
- "mark":{
388
- "p":0.8391959799,
389
- "r":0.7076271186,
390
- "f":0.767816092
391
  },
392
- "cop":{
393
- "p":0.9607843137,
394
- "r":0.9423076923,
395
- "f":0.9514563107
396
- },
397
- "acl":{
398
- "p":0.2297297297,
399
- "r":0.3035714286,
400
- "f":0.2615384615
401
- },
402
- "nummod":{
403
- "p":0.7834224599,
404
- "r":0.7855227882,
405
- "f":0.7844712182
406
- },
407
- "appos":{
408
- "p":0.5615384615,
409
- "r":0.553030303,
410
- "f":0.5572519084
411
- },
412
- "xcomp":{
413
- "p":0.3630573248,
414
- "r":0.456,
415
- "f":0.4042553191
416
- },
417
- "ccomp":{
418
- "p":0.4,
419
- "r":0.0408163265,
420
- "f":0.0740740741
421
- },
422
- "obl:tmod":{
423
- "p":0.6551724138,
424
- "r":0.6229508197,
425
- "f":0.6386554622
426
- },
427
- "advcl":{
428
- "p":0.2831858407,
429
- "r":0.2269503546,
430
- "f":0.2519685039
431
- },
432
- "advmod:emph":{
433
- "p":0.6666666667,
434
- "r":0.0434782609,
435
- "f":0.0816326531
436
- },
437
- "case:adv":{
438
  "p":0.6666666667,
439
- "r":0.4615384615,
440
- "f":0.5454545455
441
- },
442
- "obl:agent":{
443
- "p":0.0,
444
- "r":0.0,
445
- "f":0.0
446
  },
447
- "flat":{
448
- "p":0.5185185185,
449
- "r":0.3255813953,
450
- "f":0.4
451
- },
452
- "parataxis":{
453
- "p":0.25,
454
- "r":0.1020408163,
455
- "f":0.1449275362
456
- },
457
- "nmod:lmod":{
458
- "p":0.8333333333,
459
- "r":0.3846153846,
460
- "f":0.5263157895
461
- },
462
- "flat:foreign":{
463
- "p":0.0,
464
- "r":0.0,
465
- "f":0.0
466
  },
467
- "nmod:tmod":{
468
- "p":0.4,
469
- "r":0.3636363636,
470
- "f":0.380952381
471
  },
472
- "iobj":{
473
- "p":0.0,
474
- "r":0.0,
475
- "f":0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  },
477
- "csubj":{
478
- "p":0.0,
479
- "r":0.0,
480
- "f":0.0
481
  },
482
- "fixed":{
483
  "p":0.5555555556,
484
- "r":0.3846153846,
485
- "f":0.4545454545
486
  },
487
- "discourse":{
488
- "p":0.0,
489
- "r":0.0,
490
- "f":0.0
491
  },
492
- "cc:preconj":{
493
  "p":0.0,
494
  "r":0.0,
495
  "f":0.0
496
  },
497
- "compound:a":{
498
- "p":0.0,
499
- "r":0.0,
500
- "f":0.0
501
  }
502
  },
503
- "sents_p":0.8272425249,
504
- "sents_r":0.8908765653,
505
- "sents_f":0.857881137,
506
- "tok2vec_loss":7567.4337583379,
507
- "tagger_loss":736.1438090745,
508
- "morphologizer_loss":1556.8932885677,
509
- "trainable_lemmatizer_loss":350.339323577,
510
- "parser_loss":10378.5765946195
511
  },
512
  "requirements":[
513
 
 
1
  {
2
  "lang":"id",
3
  "name":"core_news_sm",
4
+ "version":"0.0.1",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.7.4,<3.8.0",
11
+ "spacy_git_version":"bff8725f4",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
19
  "tok2vec":[
20
 
21
  ],
22
+ "ner":[
23
+ "CARDINAL",
24
+ "DATE",
25
+ "EVENT",
26
+ "FAC",
27
+ "GPE",
28
+ "LANGUAGE",
29
+ "LAW",
30
+ "LOC",
31
+ "MONEY",
32
+ "NORP",
33
+ "ORDINAL",
34
+ "ORG",
35
+ "PERCENT",
36
+ "PERSON",
37
+ "PRODUCT",
38
+ "QUANTITY",
39
+ "TIME",
40
+ "WORK_OF_ART"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  ]
42
  },
43
  "pipeline":[
44
  "tok2vec",
45
+ "ner"
 
 
 
46
  ],
47
  "components":[
48
  "tok2vec",
49
+ "ner"
 
 
 
50
  ],
51
  "disabled":[
52
 
53
  ],
54
  "performance":{
55
+ "ents_f":0.6362633321,
56
+ "ents_p":0.6721056721,
57
+ "ents_r":0.6040502793,
58
+ "ents_per_type":{
59
+ "PRODUCT":{
60
+ "p":0.3571428571,
61
+ "r":0.0704225352,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "f":0.1176470588
63
  },
64
+ "GPE":{
65
+ "p":0.7411167513,
66
+ "r":0.7934782609,
67
+ "f":0.7664041995
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  },
69
+ "LOC":{
70
+ "p":0.7142857143,
71
+ "r":0.4651162791,
72
+ "f":0.5633802817
73
  },
74
+ "FAC":{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  "p":0.6666666667,
76
+ "r":0.1666666667,
77
+ "f":0.2666666667
 
 
 
 
 
78
  },
79
+ "ORG":{
80
+ "p":0.4651162791,
81
+ "r":0.3174603175,
82
+ "f":0.3773584906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  },
84
+ "PERSON":{
85
+ "p":0.6809815951,
86
+ "r":0.6434782609,
87
+ "f":0.6616989568
88
  },
89
+ "WORK_OF_ART":{
90
+ "p":0.1785714286,
91
+ "r":0.4,
92
+ "f":0.2469135802
93
+ },
94
+ "DATE":{
95
+ "p":0.75,
96
+ "r":0.7804878049,
97
+ "f":0.764940239
98
+ },
99
+ "ORDINAL":{
100
+ "p":0.6956521739,
101
+ "r":0.5925925926,
102
+ "f":0.64
103
+ },
104
+ "CARDINAL":{
105
+ "p":0.7155963303,
106
+ "r":0.75,
107
+ "f":0.7323943662
108
+ },
109
+ "NORP":{
110
+ "p":0.7894736842,
111
+ "r":0.4918032787,
112
+ "f":0.6060606061
113
+ },
114
+ "LAW":{
115
+ "p":0.8,
116
+ "r":0.3333333333,
117
+ "f":0.4705882353
118
+ },
119
+ "QUANTITY":{
120
+ "p":0.6666666667,
121
+ "r":0.5925925926,
122
+ "f":0.6274509804
123
  },
124
+ "EVENT":{
125
+ "p":0.6875,
126
+ "r":0.2619047619,
127
+ "f":0.3793103448
128
  },
129
+ "PERCENT":{
130
  "p":0.5555555556,
131
+ "r":0.7142857143,
132
+ "f":0.625
133
  },
134
+ "LANGUAGE":{
135
+ "p":0.8,
136
+ "r":0.9230769231,
137
+ "f":0.8571428571
138
  },
139
+ "MONEY":{
140
  "p":0.0,
141
  "r":0.0,
142
  "f":0.0
143
  },
144
+ "TIME":{
145
+ "p":0.5,
146
+ "r":1.0,
147
+ "f":0.6666666667
148
  }
149
  },
150
+ "tok2vec_loss":457.675804386,
151
+ "ner_loss":1277.2143377495
 
 
 
 
 
 
152
  },
153
  "requirements":[
154
 
ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
ner/model ADDED
Binary file (186 kB). View file
 
ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves��{"0":{},"1":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"2":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"3":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"4":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65,"":1},"5":{"":1}}�cfg��neg_key�
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b0c198567f39ac6146d7716317c079ec7977eeac835e0e6d3fb926c4076d8e6
3
  size 34126801
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f4155df88b3296a30602bab1237736752867c37007f3d4b05233c2e0c46780
3
  size 34126801
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff