sowallah commited on
Commit
a0a45e9
·
verified ·
1 Parent(s): 5168990

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ luo-pos/tokenizer.json filter=lfs diff=lfs merge=lfs -text
luo-pos/config.json ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": ",",
15
+ "1": ".",
16
+ "2": "ABBR",
17
+ "3": "AD V",
18
+ "4": "ADJ",
19
+ "5": "ADO",
20
+ "6": "ADP",
21
+ "7": "ADV",
22
+ "8": "ADp",
23
+ "9": "ADv",
24
+ "10": "AP",
25
+ "11": "AUX",
26
+ "12": "Aadv",
27
+ "13": "Ad",
28
+ "14": "AdJ",
29
+ "15": "AdV",
30
+ "16": "Adcv",
31
+ "17": "Adj",
32
+ "18": "Adj+NN+Adj",
33
+ "19": "Adj+Prep",
34
+ "20": "Adj+V",
35
+ "21": "Adj.",
36
+ "22": "Adp",
37
+ "23": "Adsv",
38
+ "24": "Adv",
39
+ "25": "Adv+NN",
40
+ "26": "Adv+V",
41
+ "27": "Adv.",
42
+ "28": "Aj",
43
+ "29": "C",
44
+ "30": "CO",
45
+ "31": "COJ",
46
+ "32": "CONG",
47
+ "33": "CONJ",
48
+ "34": "CONJ.",
49
+ "35": "Cinj",
50
+ "36": "Coinj",
51
+ "37": "Con",
52
+ "38": "ConJ",
53
+ "39": "Conj",
54
+ "40": "Conj+NN",
55
+ "41": "Conj+NN+Adj",
56
+ "42": "Conj+PRON",
57
+ "43": "Conj+V",
58
+ "44": "Conj.",
59
+ "45": "D",
60
+ "46": "DET",
61
+ "47": "DeT",
62
+ "48": "Der",
63
+ "49": "Det",
64
+ "50": "Det+Prep",
65
+ "51": "Det.",
66
+ "52": "DetV",
67
+ "53": "Detr",
68
+ "54": "INTENS",
69
+ "55": "INTER",
70
+ "56": "MM",
71
+ "57": "N",
72
+ "58": "N N",
73
+ "59": "NN",
74
+ "60": "NN /ADJ",
75
+ "61": "NN+Adj",
76
+ "62": "NN+Conj",
77
+ "63": "NN+Det",
78
+ "64": "NN+Prep",
79
+ "65": "NNADP",
80
+ "66": "NNN",
81
+ "67": "NNV",
82
+ "68": "NU",
83
+ "69": "NU M",
84
+ "70": "NUM",
85
+ "71": "NUMB",
86
+ "72": "NUMM",
87
+ "73": "NUm",
88
+ "74": "Nn",
89
+ "75": "Num",
90
+ "76": "P",
91
+ "77": "PART",
92
+ "78": "PR",
93
+ "79": "PRE",
94
+ "80": "PRO",
95
+ "81": "PROIN",
96
+ "82": "PRON",
97
+ "83": "PRONV",
98
+ "84": "PROn",
99
+ "85": "PU",
100
+ "86": "PUCT",
101
+ "87": "PUNCT",
102
+ "88": "PUNCt",
103
+ "89": "PUNT",
104
+ "90": "Prep",
105
+ "91": "Prep+NN",
106
+ "92": "Pron",
107
+ "93": "Punct",
108
+ "94": "QADP",
109
+ "95": "S",
110
+ "96": "SPEC",
111
+ "97": "V",
112
+ "98": "V+Adv",
113
+ "99": "V+NN",
114
+ "100": "VADP",
115
+ "101": "VV",
116
+ "102": "X",
117
+ "103": "XX",
118
+ "104": "aAdj",
119
+ "105": "adj",
120
+ "106": "adp",
121
+ "107": "conj",
122
+ "108": "conj.",
123
+ "109": "cv",
124
+ "110": "nn",
125
+ "111": "punct",
126
+ "112": "v",
127
+ "113": "x"
128
+ },
129
+ "initializer_range": 0.02,
130
+ "intermediate_size": 4096,
131
+ "label2id": {
132
+ ",": 0,
133
+ ".": 1,
134
+ "ABBR": 2,
135
+ "AD V": 3,
136
+ "ADJ": 4,
137
+ "ADO": 5,
138
+ "ADP": 6,
139
+ "ADV": 7,
140
+ "ADp": 8,
141
+ "ADv": 9,
142
+ "AP": 10,
143
+ "AUX": 11,
144
+ "Aadv": 12,
145
+ "Ad": 13,
146
+ "AdJ": 14,
147
+ "AdV": 15,
148
+ "Adcv": 16,
149
+ "Adj": 17,
150
+ "Adj+NN+Adj": 18,
151
+ "Adj+Prep": 19,
152
+ "Adj+V": 20,
153
+ "Adj.": 21,
154
+ "Adp": 22,
155
+ "Adsv": 23,
156
+ "Adv": 24,
157
+ "Adv+NN": 25,
158
+ "Adv+V": 26,
159
+ "Adv.": 27,
160
+ "Aj": 28,
161
+ "C": 29,
162
+ "CO": 30,
163
+ "COJ": 31,
164
+ "CONG": 32,
165
+ "CONJ": 33,
166
+ "CONJ.": 34,
167
+ "Cinj": 35,
168
+ "Coinj": 36,
169
+ "Con": 37,
170
+ "ConJ": 38,
171
+ "Conj": 39,
172
+ "Conj+NN": 40,
173
+ "Conj+NN+Adj": 41,
174
+ "Conj+PRON": 42,
175
+ "Conj+V": 43,
176
+ "Conj.": 44,
177
+ "D": 45,
178
+ "DET": 46,
179
+ "DeT": 47,
180
+ "Der": 48,
181
+ "Det": 49,
182
+ "Det+Prep": 50,
183
+ "Det.": 51,
184
+ "DetV": 52,
185
+ "Detr": 53,
186
+ "INTENS": 54,
187
+ "INTER": 55,
188
+ "MM": 56,
189
+ "N": 57,
190
+ "N N": 58,
191
+ "NN": 59,
192
+ "NN /ADJ": 60,
193
+ "NN+Adj": 61,
194
+ "NN+Conj": 62,
195
+ "NN+Det": 63,
196
+ "NN+Prep": 64,
197
+ "NNADP": 65,
198
+ "NNN": 66,
199
+ "NNV": 67,
200
+ "NU": 68,
201
+ "NU M": 69,
202
+ "NUM": 70,
203
+ "NUMB": 71,
204
+ "NUMM": 72,
205
+ "NUm": 73,
206
+ "Nn": 74,
207
+ "Num": 75,
208
+ "P": 76,
209
+ "PART": 77,
210
+ "PR": 78,
211
+ "PRE": 79,
212
+ "PRO": 80,
213
+ "PROIN": 81,
214
+ "PRON": 82,
215
+ "PRONV": 83,
216
+ "PROn": 84,
217
+ "PU": 85,
218
+ "PUCT": 86,
219
+ "PUNCT": 87,
220
+ "PUNCt": 88,
221
+ "PUNT": 89,
222
+ "Prep": 90,
223
+ "Prep+NN": 91,
224
+ "Pron": 92,
225
+ "Punct": 93,
226
+ "QADP": 94,
227
+ "S": 95,
228
+ "SPEC": 96,
229
+ "V": 97,
230
+ "V+Adv": 98,
231
+ "V+NN": 99,
232
+ "VADP": 100,
233
+ "VV": 101,
234
+ "X": 102,
235
+ "XX": 103,
236
+ "aAdj": 104,
237
+ "adj": 105,
238
+ "adp": 106,
239
+ "conj": 107,
240
+ "conj.": 108,
241
+ "cv": 109,
242
+ "nn": 110,
243
+ "punct": 111,
244
+ "v": 112,
245
+ "x": 113
246
+ },
247
+ "layer_norm_eps": 1e-05,
248
+ "max_position_embeddings": 514,
249
+ "model_type": "xlm-roberta",
250
+ "num_attention_heads": 16,
251
+ "num_hidden_layers": 24,
252
+ "output_past": true,
253
+ "pad_token_id": 1,
254
+ "position_embedding_type": "absolute",
255
+ "transformers_version": "4.57.6",
256
+ "type_vocab_size": 1,
257
+ "use_cache": true,
258
+ "vocab_size": 250002
259
+ }
luo-pos/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e41d6f76bd270b6936d3550063951ea68a6e258150e892072a230840f0d2f610
3
+ size 2235879264
luo-pos/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
luo-pos/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8373f9cd3d27591e1924426bcc1c8799bc5a9affc4fc857982c5d66668dd1f41
3
+ size 17082832
luo-pos/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizerFast",
54
+ "unk_token": "<unk>"
55
+ }
luo-pos/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402c67eb466751ce81177fdf50cf51e9adba27c7ff752a31707a0743a6a4c1d4
3
+ size 5432