veryfansome commited on
Commit
a674fb1
·
verified ·
1 Parent(s): 45a789d

Upload 8 files

Browse files
models/ud_ewt_gum_20250304/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
models/ud_ewt_gum_20250304/config.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
+ "architectures": [
4
+ "MultiHeadModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "label_maps": {
13
+ "Case": [
14
+ "Acc",
15
+ "O",
16
+ "Nom"
17
+ ],
18
+ "Definite": [
19
+ "Ind",
20
+ "Def",
21
+ "O"
22
+ ],
23
+ "Degree": [
24
+ "Cmp",
25
+ "O",
26
+ "Sup",
27
+ "Pos"
28
+ ],
29
+ "Gender": [
30
+ "Fem",
31
+ "Neut",
32
+ "Masc",
33
+ "O"
34
+ ],
35
+ "Mood": [
36
+ "Ind",
37
+ "Imp",
38
+ "O"
39
+ ],
40
+ "NumType": [
41
+ "Ord",
42
+ "Card",
43
+ "Mult",
44
+ "O"
45
+ ],
46
+ "Number": [
47
+ "O",
48
+ "Plur",
49
+ "Sing"
50
+ ],
51
+ "Person": [
52
+ "1",
53
+ "O",
54
+ "3",
55
+ "2"
56
+ ],
57
+ "Poss": [
58
+ "Yes",
59
+ "O"
60
+ ],
61
+ "PronType": [
62
+ "O",
63
+ "Dem",
64
+ "Art",
65
+ "Rel",
66
+ "Prs",
67
+ "Int"
68
+ ],
69
+ "Reflex": [
70
+ "Yes",
71
+ "O"
72
+ ],
73
+ "Tense": [
74
+ "Past",
75
+ "O",
76
+ "Pres"
77
+ ],
78
+ "Typo": [
79
+ "Yes",
80
+ "O"
81
+ ],
82
+ "VerbForm": [
83
+ "O",
84
+ "Fin",
85
+ "Inf",
86
+ "Part",
87
+ "Ger"
88
+ ],
89
+ "deprel": [
90
+ "ccomp",
91
+ "punct",
92
+ "obl",
93
+ "flat",
94
+ "dislocated",
95
+ "mark",
96
+ "flat:foreign",
97
+ "obl:tmod",
98
+ "nmod:npmod",
99
+ "vocative",
100
+ "discourse",
101
+ "compound:prt",
102
+ "dep",
103
+ "reparandum",
104
+ "obj",
105
+ "parataxis",
106
+ "nsubj",
107
+ "nummod",
108
+ "acl",
109
+ "compound",
110
+ "csubj:pass",
111
+ "acl:relcl",
112
+ "aux",
113
+ "det:predet",
114
+ "csubj",
115
+ "list",
116
+ "amod",
117
+ "nmod:poss",
118
+ "cop",
119
+ "nsubj:pass",
120
+ "advcl",
121
+ "fixed",
122
+ "xcomp",
123
+ "nmod:tmod",
124
+ "cc",
125
+ "case",
126
+ "cc:preconj",
127
+ "obl:npmod",
128
+ "goeswith",
129
+ "appos",
130
+ "nmod",
131
+ "advmod",
132
+ "iobj",
133
+ "det",
134
+ "conj",
135
+ "root",
136
+ "orphan",
137
+ "expl",
138
+ "aux:pass"
139
+ ],
140
+ "xpos": [
141
+ ".",
142
+ "NN",
143
+ "RBS",
144
+ "PRP$",
145
+ "RBR",
146
+ ",",
147
+ ":",
148
+ "JJR",
149
+ "HYPH",
150
+ "POS",
151
+ "PDT",
152
+ "$",
153
+ "NFP",
154
+ "RP",
155
+ "JJ",
156
+ "CC",
157
+ "WP",
158
+ "WRB",
159
+ "FW",
160
+ "-RRB-",
161
+ "VBD",
162
+ "TO",
163
+ "NNPS",
164
+ "ADD",
165
+ "JJS",
166
+ "NNP",
167
+ "SYM",
168
+ "PRP",
169
+ "IN",
170
+ "``",
171
+ "VBN",
172
+ "WP$",
173
+ "RB",
174
+ "CD",
175
+ "EX",
176
+ "UH",
177
+ "VBP",
178
+ "-LRB-",
179
+ "VBZ",
180
+ "LS",
181
+ "DT",
182
+ "VBG",
183
+ "WDT",
184
+ "VB",
185
+ "NNS",
186
+ "MD",
187
+ "''"
188
+ ]
189
+ },
190
+ "layer_norm_eps": 1e-07,
191
+ "legacy": true,
192
+ "max_position_embeddings": 512,
193
+ "max_relative_positions": -1,
194
+ "model_type": "deberta-v2",
195
+ "norm_rel_ebd": "layer_norm",
196
+ "num_attention_heads": 12,
197
+ "num_hidden_layers": 12,
198
+ "num_labels_dict": {
199
+ "Case": 3,
200
+ "Definite": 3,
201
+ "Degree": 4,
202
+ "Gender": 4,
203
+ "Mood": 3,
204
+ "NumType": 4,
205
+ "Number": 3,
206
+ "Person": 4,
207
+ "Poss": 2,
208
+ "PronType": 6,
209
+ "Reflex": 2,
210
+ "Tense": 3,
211
+ "Typo": 2,
212
+ "VerbForm": 5,
213
+ "deprel": 49,
214
+ "xpos": 47
215
+ },
216
+ "pad_token_id": 0,
217
+ "pooler_dropout": 0,
218
+ "pooler_hidden_act": "gelu",
219
+ "pooler_hidden_size": 768,
220
+ "pos_att_type": [
221
+ "p2c",
222
+ "c2p"
223
+ ],
224
+ "position_biased_input": false,
225
+ "position_buckets": 256,
226
+ "relative_attention": true,
227
+ "share_att_key": true,
228
+ "torch_dtype": "float32",
229
+ "transformers_version": "4.49.0",
230
+ "type_vocab_size": 0,
231
+ "vocab_size": 128100
232
+ }
models/ud_ewt_gum_20250304/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1762a39ba8e270f196eaf66802fef7e8bc919d26c4f8f09ae6631d9b8ff8e4a2
3
+ size 735796144
models/ud_ewt_gum_20250304/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
models/ud_ewt_gum_20250304/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
models/ud_ewt_gum_20250304/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/ud_ewt_gum_20250304/tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "[CLS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "[SEP]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "[UNK]",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128000": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "[CLS]",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "[CLS]",
48
+ "do_lower_case": false,
49
+ "eos_token": "[SEP]",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "[MASK]",
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "sp_model_kwargs": {},
56
+ "split_by_punct": false,
57
+ "tokenizer_class": "DebertaV2Tokenizer",
58
+ "unk_token": "[UNK]",
59
+ "vocab_type": "spm"
60
+ }
models/ud_ewt_gum_20250304/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83dd99100603833a11b566e7e8548cd38446d713266596402b76d6fcbcd63ee0
3
+ size 5240