Commit
·
84c7b94
1
Parent(s):
0696e75
Dataset README.md
Browse files
dataset/ud_ewt_gum_pud_20250610/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Dataset
|
| 2 |
+
- xpos
|
| 3 |
+
- pos (upos)
|
| 4 |
+
- all morphological features except `Typo`
|
| 5 |
+
```text
|
| 6 |
+
DatasetDict({
|
| 7 |
+
test: Dataset({
|
| 8 |
+
features: ['text', 'tokens', 'xpos', 'deprel', 'pos', 'Abbr', 'Case', 'Definite', 'Degree', 'Foreign', 'Gender', 'Mood', 'NumType', 'Number', 'Person', 'Polarity', 'PronType', 'Poss', 'Reflex', 'Tense', 'VerbForm', 'Voice'],
|
| 9 |
+
num_rows: 3558
|
| 10 |
+
})
|
| 11 |
+
train: Dataset({
|
| 12 |
+
features: ['text', 'tokens', 'xpos', 'deprel', 'pos', 'Abbr', 'Case', 'Definite', 'Degree', 'Foreign', 'Gender', 'Mood', 'NumType', 'Number', 'Person', 'Polarity', 'PronType', 'Poss', 'Reflex', 'Tense', 'VerbForm', 'Voice'],
|
| 13 |
+
num_rows: 14120
|
| 14 |
+
})
|
| 15 |
+
validation: Dataset({
|
| 16 |
+
features: ['text', 'tokens', 'xpos', 'deprel', 'pos', 'Abbr', 'Case', 'Definite', 'Degree', 'Foreign', 'Gender', 'Mood', 'NumType', 'Number', 'Person', 'Polarity', 'PronType', 'Poss', 'Reflex', 'Tense', 'VerbForm', 'Voice'],
|
| 17 |
+
num_rows: 2393
|
| 18 |
+
})
|
| 19 |
+
})
|
| 20 |
+
2025-06-10 23:43:19,516 - utils - INFO - Columns:
|
| 21 |
+
2025-06-10 23:43:19,516 - utils - INFO - xpos:
|
| 22 |
+
2025-06-10 23:43:19,516 - utils - INFO - 48 labels: ['$:275', "'':1460", ',:13128', '-LRB-:1319', '-RRB-:1352', '.:16357', '::1497', 'ADD:395', 'AFX:43', 'CC:9949', 'CD:6121', 'DT:26998', 'EX:567', 'FW:254', 'HYPH:782', 'IN:33343', 'JJ:19114', 'JJR:765', 'JJS:651', 'LS:103', 'MD:3998', 'NFP:407', 'NN:41822', 'NNP:21351', 'NNPS:859', 'NNS:14408', 'PDT:244', 'POS:557', 'PRP:14822', 'PRP$:4539', 'RB:13743', 'RBR:433', 'RBS:259', 'RP:1165', 'SYM:303', 'TO:4542', 'UH:860', 'VB:12293', 'VBD:8903', 'VBG:5020', 'VBN:6307', 'VBP:6780', 'VBZ:6609', 'WDT:1475', 'WP:1039', 'WP$:32', 'WRB:1287', '``:1487']
|
| 23 |
+
2025-06-10 23:43:19,516 - utils - INFO - deprel:
|
| 24 |
+
2025-06-10 23:43:19,516 - utils - INFO - 43 labels: ['acl:2524', 'acl:relcl:2838', 'advcl:5243', 'advmod:13863', 'amod:16158', 'appos:2475', 'aux:7724', 'aux:pass:2392', 'case:28570', 'cc:10041', 'cc:preconj:157', 'ccomp:2899', 'compound:12556', 'compound:prt:1174', 'conj:11395', 'cop:5901', 'csubj:424', 'csubj:pass:15', 'det:25538', 'det:predet:234', 'discourse:829', 'expl:863', 'fixed:831', 'flat:2842', 'iobj:461', 'list:794', 'mark:10468', 'nmod:12279', 'nmod:npmod:253', 'nmod:poss:5070', 'nmod:tmod:597', 'nsubj:22425', 'nsubj:pass:2098', 'nummod:3497', 'obj:14786', 'obl:14782', 'obl:npmod:536', 'obl:tmod:670', 'parataxis:1724', 'punct:37522', 'root:20071', 'vocative:164', 'xcomp:4334']
|
| 25 |
+
2025-06-10 23:43:19,516 - utils - INFO - pos:
|
| 26 |
+
2025-06-10 23:43:19,516 - utils - INFO - 17 labels: ['ADJ:20532', 'ADP:29378', 'ADV:13451', 'AUX:16085', 'CCONJ:9954', 'DET:26084', 'INTJ:861', 'NOUN:55424', 'NUM:6126', 'PART:6524', 'PRON:24115', 'PROPN:22381', 'PUNCT:37650', 'SCONJ:6038', 'SYM:826', 'VERB:33812', 'X:776']
|
| 27 |
+
2025-06-10 23:43:19,516 - utils - INFO - Abbr:
|
| 28 |
+
2025-06-10 23:43:19,516 - utils - INFO - 2 labels: ['X:309917', 'Yes:100']
|
| 29 |
+
2025-06-10 23:43:19,516 - utils - INFO - Case:
|
| 30 |
+
2025-06-10 23:43:19,516 - utils - INFO - 3 labels: ['Acc:3192', 'Nom:11514', 'X:295311']
|
| 31 |
+
2025-06-10 23:43:19,516 - utils - INFO - Definite:
|
| 32 |
+
2025-06-10 23:43:19,516 - utils - INFO - 3 labels: ['Def:15422', 'Ind:6627', 'X:287968']
|
| 33 |
+
2025-06-10 23:43:19,516 - utils - INFO - Degree:
|
| 34 |
+
2025-06-10 23:43:19,516 - utils - INFO - 4 labels: ['Cmp:978', 'Pos:19676', 'Sup:747', 'X:288616']
|
| 35 |
+
2025-06-10 23:43:19,516 - utils - INFO - Foreign:
|
| 36 |
+
2025-06-10 23:43:19,516 - utils - INFO - 2 labels: ['X:310006', 'Yes:11']
|
| 37 |
+
2025-06-10 23:43:19,516 - utils - INFO - Gender:
|
| 38 |
+
2025-06-10 23:43:19,516 - utils - INFO - 4 labels: ['Fem:983', 'Masc:2353', 'Neut:2707', 'X:303974']
|
| 39 |
+
2025-06-10 23:43:19,516 - utils - INFO - Mood:
|
| 40 |
+
2025-06-10 23:43:19,516 - utils - INFO - 3 labels: ['Imp:1715', 'Ind:22291', 'X:286011']
|
| 41 |
+
2025-06-10 23:43:19,516 - utils - INFO - NumType:
|
| 42 |
+
2025-06-10 23:43:19,516 - utils - INFO - 4 labels: ['Card:6121', 'Mult:87', 'Ord:444', 'X:303365']
|
| 43 |
+
2025-06-10 23:43:19,516 - utils - INFO - Number:
|
| 44 |
+
2025-06-10 23:43:19,516 - utils - INFO - 3 labels: ['Plur:20204', 'Sing:85399', 'X:204414']
|
| 45 |
+
2025-06-10 23:43:19,516 - utils - INFO - Person:
|
| 46 |
+
2025-06-10 23:43:19,516 - utils - INFO - 4 labels: ['1:7370', '2:3672', '3:17016', 'X:281959']
|
| 47 |
+
2025-06-10 23:43:19,516 - utils - INFO - Polarity:
|
| 48 |
+
2025-06-10 23:43:19,516 - utils - INFO - 2 labels: ['Neg:1035', 'X:308982']
|
| 49 |
+
2025-06-10 23:43:19,516 - utils - INFO - PronType:
|
| 50 |
+
2025-06-10 23:43:19,516 - utils - INFO - 6 labels: ['Art:22049', 'Dem:3549', 'Int:1991', 'Prs:19246', 'Rel:1790', 'X:261392']
|
| 51 |
+
2025-06-10 23:43:19,516 - utils - INFO - Poss:
|
| 52 |
+
2025-06-10 23:43:19,516 - utils - INFO - 2 labels: ['X:305484', 'Yes:4533']
|
| 53 |
+
2025-06-10 23:43:19,516 - utils - INFO - Reflex:
|
| 54 |
+
2025-06-10 23:43:19,516 - utils - INFO - 2 labels: ['X:309847', 'Yes:170']
|
| 55 |
+
2025-06-10 23:43:19,516 - utils - INFO - Tense:
|
| 56 |
+
2025-06-10 23:43:19,516 - utils - INFO - 3 labels: ['Past:15210', 'Pres:14396', 'X:280411']
|
| 57 |
+
2025-06-10 23:43:19,516 - utils - INFO - VerbForm:
|
| 58 |
+
2025-06-10 23:43:19,516 - utils - INFO - 5 labels: ['Fin:28004', 'Ger:4010', 'Inf:10579', 'Part:7315', 'X:260109']
|
| 59 |
+
2025-06-10 23:43:19,516 - utils - INFO - Voice:
|
| 60 |
+
2025-06-10 23:43:19,516 - utils - INFO - 2 labels: ['Pass:1118', 'X:308899']
|
| 61 |
+
```
|