miyachun commited on
Commit
459612d
·
1 Parent(s): d41e4dd

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model-last/tok2vec/model filter=lfs diff=lfs merge=lfs -text
37
+ model-last/vocab/key2row filter=lfs diff=lfs merge=lfs -text
38
+ model-last/vocab/strings.json filter=lfs diff=lfs merge=lfs -text
39
+ model-last/vocab/vectors filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import gradio as gr
3
+
4
+ nlp = spacy.load(r"./model-last")
5
+
6
+ def ner(sentence):
7
+ doc = nlp(sentence)
8
+ ents = [(e.text, e.label_) for e in doc.ents]
9
+ return ents
10
+ desc = "(GPE01)->臺北市、新北市、桃園市、臺中市、臺南市、高雄市。(GPE02)->新竹縣、苗栗縣、彰化縣、南投縣、雲林縣、嘉義縣、屏東縣、宜蘭縣、花蓮縣、臺東縣、澎湖縣、金門縣、連江縣。(GPE03)->基隆市、新竹市、嘉義市。"
11
+ demo = gr.Interface(fn=ner, inputs="text",description=desc, outputs = "text")
12
+ demo.launch()
13
+
14
+ if __name__ == "__main__":
15
+ demo.launch()
model-last/config.cfg ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "./train.spacy"
3
+ dev = "./train.spacy"
4
+ vectors = "zh_core_web_lg"
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "zh"
13
+ pipeline = ["tok2vec","ner"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ vectors = {"@vectors":"spacy.Vectors.v1"}
20
+
21
+ [nlp.tokenizer]
22
+ @tokenizers = "spacy.zh.ChineseTokenizer"
23
+ segmenter = "char"
24
+
25
+ [components]
26
+
27
+ [components.ner]
28
+ factory = "ner"
29
+ incorrect_spans_key = null
30
+ moves = null
31
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
32
+ update_with_oracle_cut_size = 100
33
+
34
+ [components.ner.model]
35
+ @architectures = "spacy.TransitionBasedParser.v2"
36
+ state_type = "ner"
37
+ extra_state_tokens = false
38
+ hidden_width = 64
39
+ maxout_pieces = 2
40
+ use_upper = true
41
+ nO = null
42
+
43
+ [components.ner.model.tok2vec]
44
+ @architectures = "spacy.Tok2VecListener.v1"
45
+ width = ${components.tok2vec.model.encode.width}
46
+ upstream = "*"
47
+
48
+ [components.tok2vec]
49
+ factory = "tok2vec"
50
+
51
+ [components.tok2vec.model]
52
+ @architectures = "spacy.Tok2Vec.v2"
53
+
54
+ [components.tok2vec.model.embed]
55
+ @architectures = "spacy.MultiHashEmbed.v2"
56
+ width = ${components.tok2vec.model.encode.width}
57
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
58
+ rows = [5000,1000,2500,2500]
59
+ include_static_vectors = true
60
+
61
+ [components.tok2vec.model.encode]
62
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
63
+ width = 256
64
+ depth = 8
65
+ window_size = 1
66
+ maxout_pieces = 3
67
+
68
+ [corpora]
69
+
70
+ [corpora.dev]
71
+ @readers = "spacy.Corpus.v1"
72
+ path = ${paths.dev}
73
+ max_length = 0
74
+ gold_preproc = false
75
+ limit = 0
76
+ augmenter = null
77
+
78
+ [corpora.train]
79
+ @readers = "spacy.Corpus.v1"
80
+ path = ${paths.train}
81
+ max_length = 0
82
+ gold_preproc = false
83
+ limit = 0
84
+ augmenter = null
85
+
86
+ [training]
87
+ dev_corpus = "corpora.dev"
88
+ train_corpus = "corpora.train"
89
+ seed = ${system.seed}
90
+ gpu_allocator = ${system.gpu_allocator}
91
+ dropout = 0.1
92
+ accumulate_gradient = 1
93
+ patience = 1600
94
+ max_epochs = 0
95
+ max_steps = 20000
96
+ eval_frequency = 200
97
+ frozen_components = []
98
+ annotating_components = []
99
+ before_to_disk = null
100
+ before_update = null
101
+
102
+ [training.batcher]
103
+ @batchers = "spacy.batch_by_words.v1"
104
+ discard_oversize = false
105
+ tolerance = 0.2
106
+ get_length = null
107
+
108
+ [training.batcher.size]
109
+ @schedules = "compounding.v1"
110
+ start = 100
111
+ stop = 1000
112
+ compound = 1.001
113
+ t = 0.0
114
+
115
+ [training.logger]
116
+ @loggers = "spacy.ConsoleLogger.v1"
117
+ progress_bar = false
118
+
119
+ [training.optimizer]
120
+ @optimizers = "Adam.v1"
121
+ beta1 = 0.9
122
+ beta2 = 0.999
123
+ L2_is_weight_decay = true
124
+ L2 = 0.01
125
+ grad_clip = 1.0
126
+ use_averages = false
127
+ eps = 0.00000001
128
+ learn_rate = 0.001
129
+
130
+ [training.score_weights]
131
+ ents_f = 1.0
132
+ ents_p = 0.0
133
+ ents_r = 0.0
134
+ ents_per_type = null
135
+
136
+ [pretraining]
137
+
138
+ [initialize]
139
+ vectors = ${paths.vectors}
140
+ init_tok2vec = ${paths.init_tok2vec}
141
+ vocab_data = null
142
+ lookups = null
143
+ before_init = null
144
+ after_init = null
145
+
146
+ [initialize.components]
147
+
148
+ [initialize.tokenizer]
149
+ pkuseg_model = null
150
+ pkuseg_user_dict = "default"
model-last/meta.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"zh",
3
+ "name":"pipeline",
4
+ "version":"0.0.0",
5
+ "spacy_version":">=3.7.2,<3.8.0",
6
+ "description":"",
7
+ "author":"",
8
+ "email":"",
9
+ "url":"",
10
+ "license":"",
11
+ "spacy_git_version":"a89eae928",
12
+ "vectors":{
13
+ "width":300,
14
+ "vectors":500000,
15
+ "keys":500000,
16
+ "name":"zh_vectors",
17
+ "mode":"default"
18
+ },
19
+ "labels":{
20
+ "tok2vec":[
21
+
22
+ ],
23
+ "ner":[
24
+ "GPE01",
25
+ "GPE02",
26
+ "GPE03"
27
+ ]
28
+ },
29
+ "pipeline":[
30
+ "tok2vec",
31
+ "ner"
32
+ ],
33
+ "components":[
34
+ "tok2vec",
35
+ "ner"
36
+ ],
37
+ "disabled":[
38
+
39
+ ],
40
+ "performance":{
41
+ "ents_f":1.0,
42
+ "ents_p":1.0,
43
+ "ents_r":1.0,
44
+ "ents_per_type":{
45
+ "GPE01":{
46
+ "p":1.0,
47
+ "r":1.0,
48
+ "f":1.0
49
+ },
50
+ "GPE02":{
51
+ "p":1.0,
52
+ "r":1.0,
53
+ "f":1.0
54
+ },
55
+ "GPE03":{
56
+ "p":1.0,
57
+ "r":1.0,
58
+ "f":1.0
59
+ }
60
+ },
61
+ "tok2vec_loss":0.0000000001,
62
+ "ner_loss":0.0000000016
63
+ }
64
+ }
model-last/ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
model-last/ner/model ADDED
Binary file (171 kB). View file
 
model-last/ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��movesٰ{"0":{},"1":{"GPE02":39,"GPE01":18,"GPE03":9},"2":{"GPE02":39,"GPE01":18,"GPE03":9},"3":{"GPE02":39,"GPE01":18,"GPE03":9},"4":{"GPE02":39,"GPE01":18,"GPE03":9,"":1},"5":{"":1}}�cfg��neg_key�
model-last/tok2vec/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
model-last/tok2vec/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8098b73e264c5bd0436ce0ba7dfebe6fa480c8816a13e6e93390984f0184935d
3
+ size 34434008
model-last/tokenizer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "segmenter":"char"
3
+ }
model-last/vocab/key2row ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc5743a5bd0280a49eaf03e67ecb1a1fa93f04080c8d5802c4562e5004f46c48
3
+ size 6868315
model-last/vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
model-last/vocab/strings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4701b348a784457e18cdd9c993c5d8c5ea0875785776356c77c61d2d0eaf67d2
3
+ size 10515213
model-last/vocab/vectors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761b22330b44dfde9c65f6646d02c785e3935b34410802e4fc9297ca3b5ba3f6
3
+ size 600000128
model-last/vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ spacy