E-katrin commited on Jun 2, 2025

Commit

53b1bdc

verified ·

1 Parent(s): d1978f6

Model save

Browse files

Files changed (19) hide show

README.md +74 -0
config.json +1928 -0
configuration.py +40 -0
dependency_classifier.py +305 -0
encoder.py +109 -0
mlp_classifier.py +46 -0
model.safetensors +3 -0
modeling_parser.py +171 -0
runs/Jun02_11-26-31_b20c304d4aee/events.out.tfevents.1748863678.b20c304d4aee.2886.0 +3 -0
runs/Jun02_11-29-35_b20c304d4aee/events.out.tfevents.1748863798.b20c304d4aee.3759.0 +3 -0
runs/Jun02_11-31-40_b20c304d4aee/events.out.tfevents.1748863923.b20c304d4aee.4331.0 +3 -0
runs/Jun02_11-39-26_b20c304d4aee/events.out.tfevents.1748864395.b20c304d4aee.6344.0 +3 -0
runs/Jun02_11-41-53_b20c304d4aee/events.out.tfevents.1748864550.b20c304d4aee.7023.0 +3 -0
runs/Jun02_11-56-41_b20c304d4aee/events.out.tfevents.1748865428.b20c304d4aee.10833.0 +3 -0
runs/Jun02_12-01-23_b20c304d4aee/events.out.tfevents.1748865720.b20c304d4aee.12053.0 +3 -0
runs/Jun02_12-03-50_b20c304d4aee/events.out.tfevents.1748865865.b20c304d4aee.12757.0 +3 -0
runs/Jun02_12-05-59_b20c304d4aee/events.out.tfevents.1748865998.b20c304d4aee.13334.0 +3 -0
training_args.bin +3 -0
utils.py +69 -0

README.md ADDED Viewed

	@@ -0,0 +1,74 @@

+---
+base_model: xlm-roberta-base
+datasets: E-katrin/train20
+language: sv
+library_name: transformers
+license: gpl-3.0
+metrics:
+- accuracy
+- f1
+pipeline_tag: token-classification
+tags:
+- pytorch
+model-index:
+- name: E-katrin/train20_10e-5_10ep
+  results:
+  - task:
+      type: token-classification
+    dataset:
+      name: train20
+      type: E-katrin/train20
+      split: validation
+    metrics:
+    - type: f1
+      value: 0.7334744654028211
+      name: Null F1
+    - type: f1
+      value: 0.014846159776685144
+      name: Lemma F1
+    - type: f1
+      value: 0.04934241130226303
+      name: Morphology F1
+    - type: accuracy
+      value: 0.5646359583952452
+      name: Ud Jaccard
+    - type: accuracy
+      value: 0.39341205717837163
+      name: Eud Jaccard
+    - type: f1
+      value: 0.7448370725028419
+      name: Miscs F1
+    - type: f1
+      value: 0.427309181058314
+      name: Deepslot F1
+    - type: f1
+      value: 0.3632536407434294
+      name: Semclass F1
+---
+# Model Card for train20_10e-5_10ep
+A transformer-based multihead parser for CoBaLD annotation.
+This model parses a pre-tokenized CoNLL-U text and jointly labels each token with three tiers of tags:
+* Grammatical tags (lemma, UPOS, XPOS, morphological features),
+* Syntactic tags (basic and enhanced Universal Dependencies),
+* Semantic tags (deep slot and semantic class).
+## Model Sources
+- **Repository:** https://github.com/CobaldAnnotation/CobaldParser
+- **Paper:** https://dialogue-conf.org/wp-content/uploads/2025/04/BaiukIBaiukAPetrovaM.009.pdf
+- **Demo:** [coming soon]
+## Citation
+```
+@inproceedings{baiuk2025cobald,
+  title={CoBaLD Parser: Joint Morphosyntactic and Semantic Annotation},
+  author={Baiuk, Ilia and Baiuk, Alexandra and Petrova, Maria},
+  booktitle={Proceedings of the International Conference "Dialogue"},
+  volume={I},
+  year={2025}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,1928 @@

+{
+  "activation": "relu",
+  "architectures": [
+    "CobaldParser"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration.CobaldParserConfig",
+    "AutoModel": "modeling_parser.CobaldParser"
+  },
+  "consecutive_null_limit": 3,
+  "deepslot_classifier_hidden_size": 256,
+  "dependency_classifier_hidden_size": 128,
+  "dropout": 0.1,
+  "encoder_model_name": "xlm-roberta-base",
+  "lemma_classifier_hidden_size": 512,
+  "misc_classifier_hidden_size": 512,
+  "model_type": "cobald_parser",
+  "morphology_classifier_hidden_size": 512,
+  "null_classifier_hidden_size": 512,
+  "semclass_classifier_hidden_size": 512,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.2",
+  "vocabulary": {
+    "deepslot": {
+      "0": "$Dislocation",
+      "1": "Addition",
+      "2": "AdditionalParticipant",
+      "3": "Addressee",
+      "4": "Addressee_Metaphoric",
+      "5": "Agent",
+      "6": "Agent_Metaphoric",
+      "7": "AttachedProperty",
+      "8": "BehalfOfEntity",
+      "9": "BeneMalefactive",
+      "10": "Causator",
+      "11": "Cause",
+      "12": "Ch_Parameter",
+      "13": "Ch_Reference",
+      "14": "Characteristic",
+      "15": "Chemical_Composite",
+      "16": "ClassifiedEntity",
+      "17": "Comparison",
+      "18": "ComparisonBase",
+      "19": "Comparison_Symmetrical",
+      "20": "Composition",
+      "21": "Concession",
+      "22": "ConcessiveCondition",
+      "23": "Concurrent",
+      "24": "Concurrent_Complement",
+      "25": "Condition",
+      "26": "Consequence",
+      "27": "ContentOfContainer",
+      "28": "ContrAgent",
+      "29": "ContrAgent_Metaphoric",
+      "30": "ContrObject",
+      "31": "Core_Hyphen_Component",
+      "32": "Correlative",
+      "33": "Criterion",
+      "34": "Degree",
+      "35": "DegreeNumerative",
+      "36": "Dependent_Hyphen_Component",
+      "37": "Elective",
+      "38": "Empty_Subject_It",
+      "39": "Experiencer",
+      "40": "Experiencer_Metaphoric",
+      "41": "Explication",
+      "42": "Fabricative",
+      "43": "FormOfRepresentation",
+      "44": "Function",
+      "45": "GappingRemnant",
+      "46": "Instrument",
+      "47": "Instrument_Situation",
+      "48": "Interval_Beginning",
+      "49": "Interval_End",
+      "50": "Landmark",
+      "51": "Limitation",
+      "52": "Locative",
+      "53": "Locative_Distance",
+      "54": "Locative_FinalPoint",
+      "55": "Locative_InitialPoint",
+      "56": "Locative_Route",
+      "57": "Manner",
+      "58": "MannerOfPositionAndMotion",
+      "59": "Manner_Configuration",
+      "60": "Manner_Reduplication",
+      "61": "MathCharacteristic",
+      "62": "MeasureSpecification",
+      "63": "Member",
+      "64": "MetaphoricLocative",
+      "65": "Metaphoric_FinalPoint",
+      "66": "Metaphoric_InitialPoint",
+      "67": "Metaphoric_Route",
+      "68": "Motive",
+      "69": "Motive_Warranty",
+      "70": "MovingLandmark",
+      "71": "Name_Title",
+      "72": "Object",
+      "73": "Object_Relation",
+      "74": "Object_Situation",
+      "75": "OneAnother",
+      "76": "Opposition",
+      "77": "OrderInTimeAndSpace",
+      "78": "Original_Object",
+      "79": "Original_Situation",
+      "80": "Parenthetical",
+      "81": "Part",
+      "82": "PartAsOrientation",
+      "83": "Part_Situation",
+      "84": "ParticipleRelativeClause",
+      "85": "Particles_Accentuation",
+      "86": "PaymentBy_NonMonetaryUnits",
+      "87": "PersonImplicit",
+      "88": "PlaceOfContact",
+      "89": "Possessor",
+      "90": "Possessor_Locative",
+      "91": "Possessor_Metaphoric",
+      "92": "Possessor_Situational",
+      "93": "PragmaticEvaluation",
+      "94": "Predicate",
+      "95": "Predicate_Adverb",
+      "96": "Predicate_DiscoursiveUnits",
+      "97": "Predicate_Noun",
+      "98": "PrincipleOfOrganization",
+      "99": "Proportion_FirstComponent",
+      "100": "Proportion_To",
+      "101": "Purpose",
+      "102": "Purpose_Distributive",
+      "103": "QuantifiedEntity",
+      "104": "Quantity",
+      "105": "Quantity_Pragmatic",
+      "106": "Raising_Target",
+      "107": "Relative",
+      "108": "Resultative",
+      "109": "Route_Situation",
+      "110": "SetEnvironment",
+      "111": "Set_Classification",
+      "112": "Set_General",
+      "113": "Source",
+      "114": "Specification",
+      "115": "Specifier_Number",
+      "116": "Spectator",
+      "117": "SpeechEtiquette",
+      "118": "Sphere",
+      "119": "StaffOfPossessors",
+      "120": "Standpoint",
+      "121": "State",
+      "122": "Stimulus",
+      "123": "SupportedEntity",
+      "124": "TagQuestion",
+      "125": "TagSubject",
+      "126": "Theme",
+      "127": "ThemeRhematic",
+      "128": "Time",
+      "129": "Vocative",
+      "130": "Vocative_Metaphoric",
+      "131": "Whole",
+      "132": "Whole_Complement",
+      "133": "_"
+    },
+    "eud_deprel": {
+      "0": "acl",
+      "1": "acl:about",
+      "2": "acl:about_whether",
+      "3": "acl:after",
+      "4": "acl:against",
+      "5": "acl:as",
+      "6": "acl:as_if",
+      "7": "acl:as_to",
+      "8": "acl:at",
+      "9": "acl:att",
+      "10": "acl:before",
+      "11": "acl:behind",
+      "12": "acl:between",
+      "13": "acl:beyond",
+      "14": "acl:but",
+      "15": "acl:but_to",
+      "16": "acl:cleft",
+      "17": "acl:concerning",
+      "18": "acl:except_that",
+      "19": "acl:for",
+      "20": "acl:for_to",
+      "21": "acl:from",
+      "22": "acl:if",
+      "23": "acl:in",
+      "24": "acl:including",
+      "25": "acl:including_whether",
+      "26": "acl:inside",
+      "27": "acl:instead_of",
+      "28": "acl:into",
+      "29": "acl:like",
+      "30": "acl:med",
+      "31": "acl:mot",
+      "32": "acl:of",
+      "33": "acl:of_if",
+      "34": "acl:of_why",
+      "35": "acl:om",
+      "36": "acl:on",
+      "37": "acl:once",
+      "38": "acl:over",
+      "39": "acl:prior_to",
+      "40": "acl:p\u00e5",
+      "41": "acl:regarding",
+      "42": "acl:relcl",
+      "43": "acl:relcl:to",
+      "44": "acl:since",
+      "45": "acl:som",
+      "46": "acl:such_as",
+      "47": "acl:than",
+      "48": "acl:that",
+      "49": "acl:though",
+      "50": "acl:to",
+      "51": "acl:toward",
+      "52": "acl:towards",
+      "53": "acl:under",
+      "54": "acl:until",
+      "55": "acl:upon",
+      "56": "acl:when",
+      "57": "acl:where",
+      "58": "acl:whether",
+      "59": "acl:why",
+      "60": "acl:with",
+      "61": "acl:\u00e4n",
+      "62": "advcl",
+      "63": "advcl:about",
+      "64": "advcl:about_whether",
+      "65": "advcl:after",
+      "66": "advcl:against",
+      "67": "advcl:albeit",
+      "68": "advcl:along_with",
+      "69": "advcl:although",
+      "70": "advcl:as",
+      "71": "advcl:as_if",
+      "72": "advcl:as_in",
+      "73": "advcl:as_long_as",
+      "74": "advcl:as_soon_as",
+      "75": "advcl:as_though",
+      "76": "advcl:as_to",
+      "77": "advcl:as_well_as",
+      "78": "advcl:as_with",
+      "79": "advcl:at",
+      "80": "advcl:att",
+      "81": "advcl:because",
+      "82": "advcl:before",
+      "83": "advcl:behind",
+      "84": "advcl:besides",
+      "85": "advcl:between",
+      "86": "advcl:beyond",
+      "87": "advcl:but",
+      "88": "advcl:by",
+      "89": "advcl:cause",
+      "90": "advcl:despite",
+      "91": "advcl:due_to",
+      "92": "advcl:d\u00e4rf\u00f6r_att",
+      "93": "advcl:d\u00e5",
+      "94": "advcl:eftersom",
+      "95": "advcl:except",
+      "96": "advcl:except_for",
+      "97": "advcl:except_that",
+      "98": "advcl:for",
+      "99": "advcl:for_if",
+      "100": "advcl:for_to",
+      "101": "advcl:from",
+      "102": "advcl:f\u00f6r_att",
+      "103": "advcl:f\u00f6rutsatt_att",
+      "104": "advcl:given",
+      "105": "advcl:if",
+      "106": "advcl:if_to",
+      "107": "advcl:in",
+      "108": "advcl:in_between",
+      "109": "advcl:in_case",
+      "110": "advcl:in_order",
+      "111": "advcl:in_order_for",
+      "112": "advcl:in_order_to",
+      "113": "advcl:in_that",
+      "114": "advcl:including_by",
+      "115": "advcl:innan",
+      "116": "advcl:inside",
+      "117": "advcl:insofar_as",
+      "118": "advcl:instead_of",
+      "119": "advcl:into",
+      "120": "advcl:lest",
+      "121": "advcl:like",
+      "122": "advcl:liksom",
+      "123": "advcl:med_att",
+      "124": "advcl:n\u00e4r",
+      "125": "advcl:of",
+      "126": "advcl:of_whether",
+      "127": "advcl:om",
+      "128": "advcl:on",
+      "129": "advcl:on_whether",
+      "130": "advcl:once",
+      "131": "advcl:out",
+      "132": "advcl:over",
+      "133": "advcl:past",
+      "134": "advcl:prior_to",
+      "135": "advcl:provided",
+      "136": "advcl:p\u00e5",
+      "137": "advcl:rather_than",
+      "138": "advcl:relcl",
+      "139": "advcl:relcl:because",
+      "140": "advcl:samtidigt_som",
+      "141": "advcl:sedan",
+      "142": "advcl:since",
+      "143": "advcl:so",
+      "144": "advcl:so_as_to",
+      "145": "advcl:so_that",
+      "146": "advcl:som",
+      "147": "advcl:such_as",
+      "148": "advcl:than",
+      "149": "advcl:than_if",
+      "150": "advcl:that",
+      "151": "advcl:the",
+      "152": "advcl:though",
+      "153": "advcl:through",
+      "154": "advcl:till",
+      "155": "advcl:to",
+      "156": "advcl:toward",
+      "157": "advcl:towards",
+      "158": "advcl:under",
+      "159": "advcl:unless",
+      "160": "advcl:until",
+      "161": "advcl:upon",
+      "162": "advcl:when",
+      "163": "advcl:where",
+      "164": "advcl:whereas",
+      "165": "advcl:whether",
+      "166": "advcl:while",
+      "167": "advcl:whilst",
+      "168": "advcl:whither",
+      "169": "advcl:with",
+      "170": "advcl:without",
+      "171": "advcl:\u00e4n",
+      "172": "advmod",
+      "173": "amod",
+      "174": "appos",
+      "175": "aux",
+      "176": "aux:pass",
+      "177": "case",
+      "178": "case:of",
+      "179": "cc",
+      "180": "cc:preconj",
+      "181": "ccomp",
+      "182": "ccomp:whether",
+      "183": "compound",
+      "184": "compound:prt",
+      "185": "conj",
+      "186": "conj:and",
+      "187": "conj:and_or",
+      "188": "conj:and_yet",
+      "189": "conj:as_well_as",
+      "190": "conj:but",
+      "191": "conj:eller",
+      "192": "conj:et",
+      "193": "conj:fast",
+      "194": "conj:for",
+      "195": "conj:let_alone",
+      "196": "conj:men",
+      "197": "conj:minus",
+      "198": "conj:nor",
+      "199": "conj:not",
+      "200": "conj:not_to_mention",
+      "201": "conj:och",
+      "202": "conj:or",
+      "203": "conj:plus",
+      "204": "conj:plus_minus",
+      "205": "conj:rather_than",
+      "206": "conj:respektive",
+      "207": "conj:samt",
+      "208": "conj:slash",
+      "209": "conj:som",
+      "210": "conj:though",
+      "211": "conj:ty",
+      "212": "conj:utan",
+      "213": "conj:yet",
+      "214": "cop",
+      "215": "csubj",
+      "216": "csubj:outer",
+      "217": "csubj:pass",
+      "218": "csubj:xsubj",
+      "219": "dep",
+      "220": "det",
+      "221": "det:predet",
+      "222": "discourse",
+      "223": "dislocated",
+      "224": "expl",
+      "225": "fixed",
+      "226": "flat",
+      "227": "flat:foreign",
+      "228": "flat:name",
+      "229": "flatname",
+      "230": "goeswith",
+      "231": "iobj",
+      "232": "list",
+      "233": "mark",
+      "234": "nmod",
+      "235": "nmod:a_la",
+      "236": "nmod:aboard",
+      "237": "nmod:about",
+      "238": "nmod:above",
+      "239": "nmod:according_to",
+      "240": "nmod:across",
+      "241": "nmod:after",
+      "242": "nmod:against",
+      "243": "nmod:along",
+      "244": "nmod:alongside",
+      "245": "nmod:amidst",
+      "246": "nmod:among",
+      "247": "nmod:amongst",
+      "248": "nmod:around",
+      "249": "nmod:as",
+      "250": "nmod:as_for",
+      "251": "nmod:as_in",
+      "252": "nmod:as_opposed_to",
+      "253": "nmod:as_to",
+      "254": "nmod:astride",
+      "255": "nmod:at",
+      "256": "nmod:atop",
+      "257": "nmod:av",
+      "258": "nmod:barring",
+      "259": "nmod:because_of",
+      "260": "nmod:before",
+      "261": "nmod:behind",
+      "262": "nmod:below",
+      "263": "nmod:besides",
+      "264": "nmod:between",
+      "265": "nmod:beyond",
+      "266": "nmod:but",
+      "267": "nmod:by",
+      "268": "nmod:circa",
+      "269": "nmod:colon",
+      "270": "nmod:concerning",
+      "271": "nmod:desc",
+      "272": "nmod:despite",
+      "273": "nmod:down",
+      "274": "nmod:due_to",
+      "275": "nmod:during",
+      "276": "nmod:efter",
+      "277": "nmod:except",
+      "278": "nmod:except_for",
+      "279": "nmod:excluding",
+      "280": "nmod:following",
+      "281": "nmod:for",
+      "282": "nmod:from",
+      "283": "nmod:from_across",
+      "284": "nmod:from_below",
+      "285": "nmod:from_outside",
+      "286": "nmod:from_over",
+      "287": "nmod:fr\u00e5n",
+      "288": "nmod:f\u00f6r",
+      "289": "nmod:hos",
+      "290": "nmod:i",
+      "291": "nmod:in",
+      "292": "nmod:in_front_of",
+      "293": "nmod:include",
+      "294": "nmod:including",
+      "295": "nmod:inom",
+      "296": "nmod:inside",
+      "297": "nmod:instead_of",
+      "298": "nmod:into",
+      "299": "nmod:like",
+      "300": "nmod:med",
+      "301": "nmod:mellan",
+      "302": "nmod:minus",
+      "303": "nmod:mot",
+      "304": "nmod:near",
+      "305": "nmod:next_to",
+      "306": "nmod:npmod",
+      "307": "nmod:oavsett",
+      "308": "nmod:of",
+      "309": "nmod:off",
+      "310": "nmod:om",
+      "311": "nmod:on",
+      "312": "nmod:onto",
+      "313": "nmod:opposite",
+      "314": "nmod:other_than",
+      "315": "nmod:out",
+      "316": "nmod:out_of",
+      "317": "nmod:outside",
+      "318": "nmod:over",
+      "319": "nmod:past",
+      "320": "nmod:per",
+      "321": "nmod:plus",
+      "322": "nmod:poss",
+      "323": "nmod:post",
+      "324": "nmod:prior_to",
+      "325": "nmod:pro",
+      "326": "nmod:p\u00e5",
+      "327": "nmod:rather_than",
+      "328": "nmod:re",
+      "329": "nmod:regarding",
+      "330": "nmod:round",
+      "331": "nmod:save",
+      "332": "nmod:since",
+      "333": "nmod:slash",
+      "334": "nmod:such_as",
+      "335": "nmod:than",
+      "336": "nmod:through",
+      "337": "nmod:throughout",
+      "338": "nmod:thru",
+      "339": "nmod:till",
+      "340": "nmod:times",
+      "341": "nmod:tmod",
+      "342": "nmod:to",
+      "343": "nmod:toward",
+      "344": "nmod:towards",
+      "345": "nmod:under",
+      "346": "nmod:unlike",
+      "347": "nmod:unmarked",
+      "348": "nmod:until",
+      "349": "nmod:up",
+      "350": "nmod:up_to",
+      "351": "nmod:up_until",
+      "352": "nmod:upon",
+      "353": "nmod:utanf\u00f6r",
+      "354": "nmod:versus",
+      "355": "nmod:via",
+      "356": "nmod:vid",
+      "357": "nmod:whether",
+      "358": "nmod:with",
+      "359": "nmod:within",
+      "360": "nmod:without",
+      "361": "nmod:x",
+      "362": "nmod:\u00e5t",
+      "363": "nsubj",
+      "364": "nsubj:outer",
+      "365": "nsubj:pass",
+      "366": "nsubj:pass:xsubj",
+      "367": "nsubj:xsubj",
+      "368": "nummod",
+      "369": "nummod:gov",
+      "370": "obj",
+      "371": "obl",
+      "372": "obl:aboard",
+      "373": "obl:about",
+      "374": "obl:above",
+      "375": "obl:according_to",
+      "376": "obl:across",
+      "377": "obl:after",
+      "378": "obl:against",
+      "379": "obl:agent",
+      "380": "obl:along",
+      "381": "obl:along_with",
+      "382": "obl:alongside",
+      "383": "obl:amid",
+      "384": "obl:amidst",
+      "385": "obl:among",
+      "386": "obl:amongst",
+      "387": "obl:apart_from",
+      "388": "obl:around",
+      "389": "obl:as",
+      "390": "obl:as_for",
+      "391": "obl:as_in",
+      "392": "obl:as_of",
+      "393": "obl:as_opposed_to",
+      "394": "obl:as_to",
+      "395": "obl:aside",
+      "396": "obl:aside_from",
+      "397": "obl:at",
+      "398": "obl:atop",
+      "399": "obl:av",
+      "400": "obl:because_of",
+      "401": "obl:before",
+      "402": "obl:behind",
+      "403": "obl:below",
+      "404": "obl:beneath",
+      "405": "obl:beside",
+      "406": "obl:besides",
+      "407": "obl:between",
+      "408": "obl:beyond",
+      "409": "obl:bland",
+      "410": "obl:but",
+      "411": "obl:by",
+      "412": "obl:circa",
+      "413": "obl:concerning",
+      "414": "obl:depending",
+      "415": "obl:depending_on",
+      "416": "obl:depending_upon",
+      "417": "obl:despite",
+      "418": "obl:down",
+      "419": "obl:due_to",
+      "420": "obl:during",
+      "421": "obl:efter",
+      "422": "obl:enligt",
+      "423": "obl:except",
+      "424": "obl:except_for",
+      "425": "obl:excluding",
+      "426": "obl:following",
+      "427": "obl:for",
+      "428": "obl:for_post",
+      "429": "obl:from",
+      "430": "obl:from_across",
+      "431": "obl:from_among",
+      "432": "obl:from_behind",
+      "433": "obl:from_over",
+      "434": "obl:fr\u00e5n",
+      "435": "obl:f\u00f6r",
+      "436": "obl:genom",
+      "437": "obl:given",
+      "438": "obl:hos",
+      "439": "obl:i",
+      "440": "obl:in",
+      "441": "obl:in_between",
+      "442": "obl:in_case_of",
+      "443": "obl:in_front_of",
+      "444": "obl:in_lieu_of",
+      "445": "obl:in_to",
+      "446": "obl:including",
+      "447": "obl:including_before",
+      "448": "obl:including_for",
+      "449": "obl:including_in",
+      "450": "obl:inom",
+      "451": "obl:inside",
+      "452": "obl:instead_of",
+      "453": "obl:into",
+      "454": "obl:like",
+      "455": "obl:med",
+      "456": "obl:med_avseende_p\u00e5",
+      "457": "obl:mellan",
+      "458": "obl:minus",
+      "459": "obl:mot",
+      "460": "obl:near",
+      "461": "obl:nearby",
+      "462": "obl:nigh",
+      "463": "obl:notwithstanding",
+      "464": "obl:npmod",
+      "465": "obl:of",
+      "466": "obl:off",
+      "467": "obl:off_of",
+      "468": "obl:om",
+      "469": "obl:omkring",
+      "470": "obl:on",
+      "471": "obl:on_board",
+      "472": "obl:on_to",
+      "473": "obl:onto",
+      "474": "obl:opposite",
+      "475": "obl:other_than",
+      "476": "obl:out",
+      "477": "obl:out_of",
+      "478": "obl:outside",
+      "479": "obl:over",
+      "480": "obl:past",
+      "481": "obl:per",
+      "482": "obl:plus",
+      "483": "obl:post",
+      "484": "obl:prior_to",
+      "485": "obl:p\u00e5",
+      "486": "obl:rather_than",
+      "487": "obl:re",
+      "488": "obl:regarding",
+      "489": "obl:round",
+      "490": "obl:runtomkring",
+      "491": "obl:since",
+      "492": "obl:som",
+      "493": "obl:such_as",
+      "494": "obl:than",
+      "495": "obl:through",
+      "496": "obl:throughout",
+      "497": "obl:thru",
+      "498": "obl:till",
+      "499": "obl:tmod",
+      "500": "obl:to",
+      "501": "obl:to_before",
+      "502": "obl:toward",
+      "503": "obl:towards",
+      "504": "obl:trots",
+      "505": "obl:under",
+      "506": "obl:underneath",
+      "507": "obl:unlike",
+      "508": "obl:unmarked",
+      "509": "obl:until",
+      "510": "obl:unto",
+      "511": "obl:up",
+      "512": "obl:up_on",
+      "513": "obl:up_to",
+      "514": "obl:up_until",
+      "515": "obl:upon",
+      "516": "obl:ur",
+      "517": "obl:utan",
+      "518": "obl:utanf\u00f6r",
+      "519": "obl:versus",
+      "520": "obl:via",
+      "521": "obl:vid",
+      "522": "obl:with",
+      "523": "obl:within",
+      "524": "obl:without",
+      "525": "obl:\u00e4n",
+      "526": "obl:\u00e5",
+      "527": "obl:\u00e5t",
+      "528": "parataxis",
+      "529": "punct",
+      "530": "ref",
+      "531": "reparandum",
+      "532": "root",
+      "533": "vocative",
+      "534": "xcomp"
+    },
+    "joint_feats": {
+      "0": "ADJ#Adjective#Abbr=Yes",
+      "1": "ADJ#Adjective#Abbr=Yes|Degree=Pos",
+      "2": "ADJ#Adjective#Case=Nom|Definite=Def|Degree=Pos",
+      "3": "ADJ#Adjective#Case=Nom|Definite=Def|Degree=Pos|Gender=Com|Number=Sing",
+      "4": "ADJ#Adjective#Case=Nom|Definite=Def|Degree=Pos|Tense=Past|VerbForm=Part",
+      "5": "ADJ#Adjective#Case=Nom|Definite=Def|Degree=Sup",
+      "6": "ADJ#Adjective#Case=Nom|Definite=Ind|Degree=Pos",
+      "7": "ADJ#Adjective#Case=Nom|Definite=Ind|Degree=Pos|Gender=Com|Number=Sing",
+      "8": "ADJ#Adjective#Case=Nom|Definite=Ind|Degree=Pos|Gender=Com|Number=Sing|Tense=Past|VerbForm=Part",
+      "9": "ADJ#Adjective#Case=Nom|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing",
+      "10": "ADJ#Adjective#Case=Nom|Definite=Ind|Degree=Pos|Number=Plur",
+      "11": "ADJ#Adjective#Case=Nom|Definite=Ind|Degree=Pos|Number=Sing",
+      "12": "ADJ#Adjective#Case=Nom|Degree=Cmp",
+      "13": "ADJ#Adjective#Case=Nom|Degree=Pos",
+      "14": "ADJ#Adjective#Case=Nom|Degree=Pos|Number=Plur",
+      "15": "ADJ#Adjective#Case=Nom|Degree=Pos|Tense=Pres|VerbForm=Part",
+      "16": "ADJ#Adjective#Case=Nom|Number=Plur|Tense=Past|VerbForm=Part",
+      "17": "ADJ#Adjective#Degree=Cmp",
+      "18": "ADJ#Adjective#Degree=Pos",
+      "19": "ADJ#Adjective#Degree=Pos|Foreign=Yes",
+      "20": "ADJ#Adjective#Degree=Sup",
+      "21": "ADJ#Adverb#Case=Nom|Definite=Ind|Degree=Pos|Gender=Com|Number=Sing",
+      "22": "ADJ#Adverb#Case=Nom|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing",
+      "23": "ADJ#Adverb#Case=Nom|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part",
+      "24": "ADJ#Adverb#Case=Nom|Definite=Ind|Degree=Pos|Number=Plur",
+      "25": "ADJ#Noun#Case=Nom|Definite=Def|Degree=Pos",
+      "26": "ADJ#Noun#Case=Nom|Degree=Pos",
+      "27": "ADJ#Numeral#Case=Nom|Definite=Def|Degree=Pos",
+      "28": "ADJ#Numeral#Case=Nom|NumType=Ord",
+      "29": "ADJ#Numeral#Degree=Pos|NumForm=Digit|NumType=Ord",
+      "30": "ADJ#Numeral#Degree=Pos|NumForm=Word|NumType=Ord",
+      "31": "ADJ#Prefixoid#_",
+      "32": "ADJ#Verb#Case=Nom|Definite=Def|Degree=Pos|Tense=Past|VerbForm=Part",
+      "33": "ADJ#Verb#Case=Nom|Definite=Ind|Degree=Pos|Gender=Com|Number=Sing|Tense=Past|VerbForm=Part",
+      "34": "ADJ#Verb#Case=Nom|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part",
+      "35": "ADJ#Verb#Case=Nom|Definite=Ind|Degree=Pos|Number=Plur",
+      "36": "ADJ#Verb#Case=Nom|Definite=Ind|Degree=Pos|Number=Plur|Tense=Past|VerbForm=Part",
+      "37": "ADJ#Verb#Case=Nom|Definite=Ind|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part",
+      "38": "ADJ#Verb#Case=Nom|Degree=Pos|Tense=Pres|VerbForm=Part",
+      "39": "ADJ#_#Case=Nom|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing",
+      "40": "ADJ#_#Case=Nom|Definite=Ind|Degree=Pos|Number=Plur",
+      "41": "ADJ#_#Case=Nom|Definite=Ind|Degree=Pos|Number=Sing|Tense=Past|VerbForm=Part",
+      "42": "ADJ#_#Case=Nom|Degree=Pos",
+      "43": "ADJ#_#Degree=Cmp",
+      "44": "ADJ#_#Degree=Pos",
+      "45": "ADJ#_#Degree=Pos|NumType=Ord",
+      "46": "ADJ#_#Degree=Sup",
+      "47": "ADJ#_#_",
+      "48": "ADP#Adjective#_",
+      "49": "ADP#Adverb#_",
+      "50": "ADP#Conjunction#_",
+      "51": "ADP#Preposition#_",
+      "52": "ADP#_#_",
+      "53": "ADV#Adjective#Degree=Pos",
+      "54": "ADV#Adjective#_",
+      "55": "ADV#Adverb#Abbr=Yes",
+      "56": "ADV#Adverb#Degree=Cmp",
+      "57": "ADV#Adverb#Degree=Pos",
+      "58": "ADV#Adverb#Degree=Pos|NumType=Mult",
+      "59": "ADV#Adverb#Degree=Sup",
+      "60": "ADV#Adverb#Degree=Sup|Polarity=Neg",
+      "61": "ADV#Adverb#NumType=Mult",
+      "62": "ADV#Adverb#Polarity=Neg",
+      "63": "ADV#Adverb#PronType=Dem",
+      "64": "ADV#Adverb#_",
+      "65": "ADV#Conjunction#_",
+      "66": "ADV#Invariable#Degree=Cmp",
+      "67": "ADV#Invariable#Degree=Sup",
+      "68": "ADV#Invariable#_",
+      "69": "ADV#Noun#_",
+      "70": "ADV#Prefixoid#_",
+      "71": "ADV#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs",
+      "72": "ADV#Pronoun#_",
+      "73": "ADV#_#Degree=Cmp",
+      "74": "ADV#_#Degree=Pos",
+      "75": "ADV#_#Degree=Sup",
+      "76": "ADV#_#NumType=Mult",
+      "77": "ADV#_#PronType=Dem",
+      "78": "ADV#_#PronType=Int",
+      "79": "ADV#_#_",
+      "80": "AUX#Verb#Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin",
+      "81": "AUX#Verb#Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin",
+      "82": "AUX#Verb#Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin",
+      "83": "AUX#Verb#Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin",
+      "84": "AUX#Verb#Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin",
+      "85": "AUX#Verb#Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin",
+      "86": "AUX#Verb#Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin",
+      "87": "AUX#Verb#Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin",
+      "88": "AUX#Verb#Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin",
+      "89": "AUX#Verb#Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin",
+      "90": "AUX#Verb#Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
+      "91": "AUX#Verb#Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Act",
+      "92": "AUX#Verb#Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act",
+      "93": "AUX#Verb#Mood=Sub|Number=Plur|Person=1|Tense=Past|VerbForm=Fin",
+      "94": "AUX#Verb#Mood=Sub|Number=Plur|Tense=Past|VerbForm=Part",
+      "95": "AUX#Verb#Number=Plur|Tense=Past|VerbForm=Part",
+      "96": "AUX#Verb#Number=Plur|Tense=Pres|VerbForm=Part",
+      "97": "AUX#Verb#VerbForm=Fin",
+      "98": "AUX#Verb#VerbForm=Ger",
+      "99": "AUX#Verb#VerbForm=Inf",
+      "100": "AUX#Verb#VerbForm=Inf|Voice=Act",
+      "101": "AUX#Verb#VerbForm=Sup|Voice=Act",
+      "102": "AUX#_#Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
+      "103": "CCONJ#Conjunction#_",
+      "104": "CCONJ#_#_",
+      "105": "DET#Adjective#Gender=Com|Number=Sing|PronType=Tot",
+      "106": "DET#Adjective#Gender=Neut|Number=Sing|PronType=Tot",
+      "107": "DET#Adjective#Number=Plur|PronType=Tot",
+      "108": "DET#Adjective#PronType=Tot",
+      "109": "DET#Article#Definite=Def|Gender=Com|Number=Sing|PronType=Art",
+      "110": "DET#Article#Definite=Def|Gender=Neut|Number=Sing|PronType=Art",
+      "111": "DET#Article#Definite=Def|Number=Plur|PronType=Art",
+      "112": "DET#Article#Definite=Def|PronType=Art",
+      "113": "DET#Article#Definite=Ind|Gender=Com|Number=Sing|PronType=Art",
+      "114": "DET#Article#Definite=Ind|Gender=Neut|Number=Sing|PronType=Art",
+      "115": "DET#Article#Definite=Ind|Gender=Neut|Number=Sing|PronType=Artt",
+      "116": "DET#Article#Definite=Ind|PronType=Art",
+      "117": "DET#Conjunction#Definite=Def|PronType=Art",
+      "118": "DET#Numeral#Definite=Ind|Gender=Neut|Number=Sing|PronType=Art",
+      "119": "DET#Prefixoid#_",
+      "120": "DET#Pronoun#Definite=Def|Gender=Com|Number=Sing|PronType=Art",
+      "121": "DET#Pronoun#Definite=Def|Gender=Com|Number=Sing|PronType=Dem",
+      "122": "DET#Pronoun#Definite=Def|Gender=Neut|Number=Sing|PronType=Art",
+      "123": "DET#Pronoun#Definite=Def|Gender=Neut|Number=Sing|PronType=Dem",
+      "124": "DET#Pronoun#Definite=Def|Number=Plur|PronType=Art",
+      "125": "DET#Pronoun#Definite=Def|Number=Plur|PronType=Dem",
+      "126": "DET#Pronoun#Definite=Def|Number=Plur|PronType=Tot",
+      "127": "DET#Pronoun#Definite=Ind|Gender=Com|Number=Sing|PronType=Ind",
+      "128": "DET#Pronoun#Definite=Ind|Gender=Com|Number=Sing|PronType=Int",
+      "129": "DET#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Ind",
+      "130": "DET#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Int",
+      "131": "DET#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Tot",
+      "132": "DET#Pronoun#Definite=Ind|Number=Plur|PronType=Ind",
+      "133": "DET#Pronoun#Definite=Ind|Number=Sing|PronType=Tot",
+      "134": "DET#Pronoun#Number=Plur|PronType=Dem",
+      "135": "DET#Pronoun#Number=Sing|PronType=Dem",
+      "136": "DET#Pronoun#Polarity=Neg",
+      "137": "DET#Pronoun#PronType=Ind",
+      "138": "DET#Pronoun#PronType=Int",
+      "139": "DET#Pronoun#PronType=Rel",
+      "140": "DET#Pronoun#PronType=Tot",
+      "141": "DET#Pronoun#_",
+      "142": "DET#_#Definite=Def|PronType=Art",
+      "143": "DET#_#Definite=EMPTY",
+      "144": "DET#_#Definite=Ind|PronType=Art",
+      "145": "DET#_#Gender=Neut|Number=Sing|PronType=Tot",
+      "146": "DET#_#Number=Sing|PronType=Dem",
+      "147": "DET#_#PronType=Int",
+      "148": "DET#_#PronType=Neg",
+      "149": "DET#_#PronType=Rcp",
+      "150": "DET#_#PronType=Tot",
+      "151": "DET#_#_",
+      "152": "INTJ#Interjection#_",
+      "153": "NOUN#Adverb#Number=Sing",
+      "154": "NOUN#Noun#Abbr=Yes",
+      "155": "NOUN#Noun#Abbr=Yes|Number=Plur",
+      "156": "NOUN#Noun#Abbr=Yes|Number=Sing",
+      "157": "NOUN#Noun#Case=Gen|Definite=Def|Gender=Com|Number=Plur",
+      "158": "NOUN#Noun#Case=Gen|Definite=Def|Gender=Com|Number=Sing",
+      "159": "NOUN#Noun#Case=Gen|Definite=Def|Gender=Neut|Number=Plur",
+      "160": "NOUN#Noun#Case=Gen|Definite=Def|Gender=Neut|Number=Sing",
+      "161": "NOUN#Noun#Case=Gen|Definite=Ind|Gender=Com|Number=Plur",
+      "162": "NOUN#Noun#Case=Gen|Definite=Ind|Gender=Neut|Number=Plur",
+      "163": "NOUN#Noun#Case=Gen|Definite=Ind|Gender=Neut|Number=Sing",
+      "164": "NOUN#Noun#Case=Nom|Definite=Def|Gender=Com|Number=Plur",
+      "165": "NOUN#Noun#Case=Nom|Definite=Def|Gender=Com|Number=Sing",
+      "166": "NOUN#Noun#Case=Nom|Definite=Def|Gender=Neut|Number=Plur",
+      "167": "NOUN#Noun#Case=Nom|Definite=Def|Gender=Neut|Number=Sing",
+      "168": "NOUN#Noun#Case=Nom|Definite=Ind|Gender=Com|Number=Plur",
+      "169": "NOUN#Noun#Case=Nom|Definite=Ind|Gender=Com|Number=Sing",
+      "170": "NOUN#Noun#Case=Nom|Definite=Ind|Gender=Neut|Number=Plur",
+      "171": "NOUN#Noun#Case=Nom|Definite=Ind|Gender=Neut|Number=Sing",
+      "172": "NOUN#Noun#Case=Nom|Definite=Ind|Gender=Neut|Number=Singg",
+      "173": "NOUN#Noun#Gender=Com",
+      "174": "NOUN#Noun#NumType=Frac|Number=Sing",
+      "175": "NOUN#Noun#Number=Plur",
+      "176": "NOUN#Noun#Number=Sing",
+      "177": "NOUN#Noun#Number=Sing|Polarity=Neg",
+      "178": "NOUN#Noun#VerbForm=Fin",
+      "179": "NOUN#Noun#_",
+      "180": "NOUN#Prefixoid#Number=Sing",
+      "181": "NOUN#Prefixoid#_",
+      "182": "NOUN#_#Case=Nom|Definite=Def|Gender=Com|Number=Sing",
+      "183": "NOUN#_#Case=Nom|Definite=Def|Gender=Neut|Number=Sing",
+      "184": "NOUN#_#Case=Nom|Definite=Ind|Gender=Com|Number=Sing",
+      "185": "NOUN#_#Case=Nom|Definite=Ind|Gender=Neut|Number=Sing",
+      "186": "NOUN#_#Number=Plur",
+      "187": "NOUN#_#Number=Sing",
+      "188": "NUM#Article#Case=Nom|Definite=Ind|Gender=Com|Number=Sing|NumType=Card",
+      "189": "NUM#Noun#Case=Nom|NumType=Card",
+      "190": "NUM#Noun#NumForm=Word|NumType=Card",
+      "191": "NUM#Numeral#Case=Nom|Definite=Ind|Gender=Com|Number=Sing|NumType=Card",
+      "192": "NUM#Numeral#Case=Nom|NumType=Card",
+      "193": "NUM#Numeral#NumForm=Digit|NumType=Card",
+      "194": "NUM#Numeral#NumForm=Digit|NumType=Frac",
+      "195": "NUM#Numeral#NumForm=Roman|NumType=Card",
+      "196": "NUM#Numeral#NumForm=Word|NumType=Card",
+      "197": "NUM#Numeral#NumType=Card",
+      "198": "NUM#Numeral#_",
+      "199": "NUM#_#Degree=Pos|NumType=Ord",
+      "200": "NUM#_#NumType=Card",
+      "201": "PART#Particle#Polarity=Neg",
+      "202": "PART#Particle#_",
+      "203": "PART#Preposition#_",
+      "204": "PART#_#Polarity=Neg",
+      "205": "PART#_#_",
+      "206": "PPROPN#_#Number=Plur",
+      "207": "PRON#Adjective#Definite=Ind|Number=Plur|PronType=Ind",
+      "208": "PRON#Adjective#Definite=Ind|Number=Plur|PronType=Tot",
+      "209": "PRON#Adverb#Definite=Def|Gender=Neut|Number=Sing|PronType=Prs",
+      "210": "PRON#Adverb#Definite=Ind|Gender=Neut|Number=Sing|PronType=Ind",
+      "211": "PRON#Adverb#_",
+      "212": "PRON#Article#Case=Nom|Definite=Def|Number=Plur|PronType=Prs",
+      "213": "PRON#Conjunction#Definite=Ind|Gender=Neut|Number=Sing|PronType=Int",
+      "214": "PRON#Conjunction#PronType=Rel",
+      "215": "PRON#Noun#Case=Nom|Definite=Ind|Gender=Com|Number=Sing|PronType=Ind",
+      "216": "PRON#Noun#Definite=Def|Gender=Com|Number=Sing|PronType=Prs",
+      "217": "PRON#Noun#Definite=Def|Number=Plur|PronType=Prs",
+      "218": "PRON#Noun#Definite=Ind|Number=Plur|PronType=Ind",
+      "219": "PRON#Numeral#Definite=Ind|Gender=Com|Number=Sing|PronType=Prs",
+      "220": "PRON#Numeral#Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs",
+      "221": "PRON#Pronoun#Case=Acc|Definite=Def|Gender=Com|Number=Plur|PronType=Prs",
+      "222": "PRON#Pronoun#Case=Acc|Definite=Def|Gender=Com|Number=Sing|PronType=Prs",
+      "223": "PRON#Pronoun#Case=Acc|Definite=Def|Number=Plur|PronType=Prs",
+      "224": "PRON#Pronoun#Case=Acc|Definite=Def|PronType=Prs",
+      "225": "PRON#Pronoun#Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs",
+      "226": "PRON#Pronoun#Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs|Reflex=Yes",
+      "227": "PRON#Pronoun#Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs",
+      "228": "PRON#Pronoun#Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs|Reflex=Yes",
+      "229": "PRON#Pronoun#Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs",
+      "230": "PRON#Pronoun#Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs|Reflex=Yes",
+      "231": "PRON#Pronoun#Case=Acc|Number=Plur|Person=1|PronType=Prs",
+      "232": "PRON#Pronoun#Case=Acc|Number=Plur|Person=1|PronType=Prs|Reflex=Yes",
+      "233": "PRON#Pronoun#Case=Acc|Number=Plur|Person=2|PronType=Prs",
+      "234": "PRON#Pronoun#Case=Acc|Number=Plur|Person=3|PronType=Prs",
+      "235": "PRON#Pronoun#Case=Acc|Number=Plur|Person=3|PronType=Prs|Reflex=Yes",
+      "236": "PRON#Pronoun#Case=Acc|Number=Sing|Person=1|PronType=Prs",
+      "237": "PRON#Pronoun#Case=Acc|Number=Sing|Person=2|PronType=Prs",
+      "238": "PRON#Pronoun#Case=Acc|Number=Sing|Person=2|PronType=Prs|Reflex=Yes",
+      "239": "PRON#Pronoun#Case=Gen|Definite=Def|Gender=Com|Number=Sing|Poss=Yes|PronType=Prs",
+      "240": "PRON#Pronoun#Case=Gen|Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs",
+      "241": "PRON#Pronoun#Case=Gen|Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs",
+      "242": "PRON#Pronoun#Case=Gen|Gender=Neut|Number=Sing|Person=3|Poss=Yes|PronType=Prs",
+      "243": "PRON#Pronoun#Case=Gen|Number=Plur|Person=1|Poss=Yes|PronType=Prs",
+      "244": "PRON#Pronoun#Case=Gen|Number=Plur|Person=3|Poss=Yes|PronType=Prs",
+      "245": "PRON#Pronoun#Case=Gen|Number=Sing|Person=1|Poss=Yes|PronType=Prs",
+      "246": "PRON#Pronoun#Case=Gen|Number=Sing|Person=2|Poss=Yes|PronType=Prs",
+      "247": "PRON#Pronoun#Case=Nom|Definite=Def|Gender=Com|Number=Plur|PronType=Prs",
+      "248": "PRON#Pronoun#Case=Nom|Definite=Def|Gender=Com|Number=Sing|PronType=Prs",
+      "249": "PRON#Pronoun#Case=Nom|Definite=Def|Number=Plur|PronType=Prs",
+      "250": "PRON#Pronoun#Case=Nom|Definite=Ind|Gender=Com|Number=Sing|PronType=Ind",
+      "251": "PRON#Pronoun#Case=Nom|Definite=Ind|Gender=Com|Number=Sing|PronType=Rel",
+      "252": "PRON#Pronoun#Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs",
+      "253": "PRON#Pronoun#Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs",
+      "254": "PRON#Pronoun#Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs|Reflex=Yes",
+      "255": "PRON#Pronoun#Case=Nom|Gender=Neut|Number=Sing|Person=3|PronType=Prs",
+      "256": "PRON#Pronoun#Case=Nom|Gender=Neut|Number=Sing|Person=3|PronType=Prs|Reflex=Yes",
+      "257": "PRON#Pronoun#Case=Nom|Number=Plur|Person=1|PronType=Prs",
+      "258": "PRON#Pronoun#Case=Nom|Number=Plur|Person=2|PronType=Prs",
+      "259": "PRON#Pronoun#Case=Nom|Number=Plur|Person=3|PronType=Prs",
+      "260": "PRON#Pronoun#Case=Nom|Number=Plur|Person=3|PronType=Prs|Reflex=Yes",
+      "261": "PRON#Pronoun#Case=Nom|Number=Sing|Person=1|PronType=Prs",
+      "262": "PRON#Pronoun#Case=Nom|Number=Sing|Person=2|PronType=Prs",
+      "263": "PRON#Pronoun#Definite=Def|Gender=Com|Number=Sing|Poss=Yes|PronType=Prs",
+      "264": "PRON#Pronoun#Definite=Def|Gender=Com|Number=Sing|PronType=Prs",
+      "265": "PRON#Pronoun#Definite=Def|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs",
+      "266": "PRON#Pronoun#Definite=Def|Gender=Neut|Number=Sing|PronType=Dem",
+      "267": "PRON#Pronoun#Definite=Def|Gender=Neut|Number=Sing|PronType=Prs",
+      "268": "PRON#Pronoun#Definite=Def|Number=Plur|Poss=Yes|PronType=Prs",
+      "269": "PRON#Pronoun#Definite=Def|Number=Plur|PronType=Dem",
+      "270": "PRON#Pronoun#Definite=Def|Number=Plur|PronType=Prs",
+      "271": "PRON#Pronoun#Definite=Def|Poss=Yes|PronType=Prs",
+      "272": "PRON#Pronoun#Definite=Ind|Gender=Com|Number=Sing|PronType=Ind",
+      "273": "PRON#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Ind",
+      "274": "PRON#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Int",
+      "275": "PRON#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Neg",
+      "276": "PRON#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs",
+      "277": "PRON#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Rel",
+      "278": "PRON#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Tot",
+      "279": "PRON#Pronoun#Definite=Ind|Number=Plur|PronType=Rel",
+      "280": "PRON#Pronoun#Number=Plur",
+      "281": "PRON#Pronoun#Number=Plur|PronType=Dem",
+      "282": "PRON#Pronoun#Number=Plur|PronType=Tot",
+      "283": "PRON#Pronoun#Number=Sing",
+      "284": "PRON#Pronoun#Number=Sing|Polarity=Neg|PronType=Neg",
+      "285": "PRON#Pronoun#Number=Sing|PronType=Dem",
+      "286": "PRON#Pronoun#Number=Sing|PronType=Ind",
+      "287": "PRON#Pronoun#Number=Sing|PronType=Neg",
+      "288": "PRON#Pronoun#Number=Sing|Reflex=Yes",
+      "289": "PRON#Pronoun#PronType=Ind",
+      "290": "PRON#Pronoun#PronType=Int",
+      "291": "PRON#Pronoun#PronType=Rel",
+      "292": "PRON#Pronoun#_",
+      "293": "PRON#Verb#Definite=Def|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs",
+      "294": "PRON#_#Case=Acc|Definite=Def|PronType=Prs",
+      "295": "PRON#_#Definite=Ind|Gender=Neut|Number=Sing|PronType=Ind",
+      "296": "PRON#_#Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs",
+      "297": "PRON#_#Gender=Neut|Number=Sing|Person=3|Poss=Yes|PronType=Prs",
+      "298": "PRON#_#Number=Sing",
+      "299": "PRON#_#Number=Sing|PronType=Dem",
+      "300": "PRON#_#Number=Sing|PronType=Ind",
+      "301": "PRON#_#PronType=Int",
+      "302": "PRON#_#PronType=Rel",
+      "303": "PROPN#Noun#Abbr=Yes|Number=Plur",
+      "304": "PROPN#Noun#Abbr=Yes|Number=Sing",
+      "305": "PROPN#Noun#Case=Gen",
+      "306": "PROPN#Noun#Case=Nom",
+      "307": "PROPN#Noun#Case=Nom|Definite=Ind|Gender=Com|Number=Sing",
+      "308": "PROPN#Noun#Number=Plur",
+      "309": "PROPN#Noun#Number=Sing",
+      "310": "PROPN#Noun#Number=Sing|Polarity=Neg",
+      "311": "PROPN#Noun#PronType=Dem",
+      "312": "PROPN#Noun#VerbForm=Fin",
+      "313": "PROPN#Prefixoid#Number=Sing",
+      "314": "PROPN#_#Abbr=Yes",
+      "315": "PROPN#_#Number=Plur",
+      "316": "PROPN#_#Number=Sing",
+      "317": "PUNCT#PUNCT#_",
+      "318": "PUNCT#_#_",
+      "319": "Prefixoid#Prefixoid#_",
+      "320": "SCONJ#Conjunction#_",
+      "321": "SCONJ#Preposition#_",
+      "322": "SCONJ#Pronoun#Definite=Ind|Gender=Neut|Number=Sing|PronType=Int",
+      "323": "SCONJ#_#_",
+      "324": "SYM#Conjunction#_",
+      "325": "SYM#Noun#Number=Sing",
+      "326": "SYM#Noun#_",
+      "327": "VERB#Adjective#Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass",
+      "328": "VERB#Verb#Case=Nom|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass",
+      "329": "VERB#Verb#Mood=Imp|VerbForm=Fin|Voice=Act",
+      "330": "VERB#Verb#Mood=Imp|VerbForm=Inf",
+      "331": "VERB#Verb#Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin",
+      "332": "VERB#Verb#Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin",
+      "333": "VERB#Verb#Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin",
+      "334": "VERB#Verb#Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin",
+      "335": "VERB#Verb#Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin",
+      "336": "VERB#Verb#Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin",
+      "337": "VERB#Verb#Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin",
+      "338": "VERB#Verb#Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin",
+      "339": "VERB#Verb#Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin",
+      "340": "VERB#Verb#Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin",
+      "341": "VERB#Verb#Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin",
+      "342": "VERB#Verb#Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
+      "343": "VERB#Verb#Mood=Ind|Tense=Past|VerbForm=Fin",
+      "344": "VERB#Verb#Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Act",
+      "345": "VERB#Verb#Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass",
+      "346": "VERB#Verb#Mood=Ind|Tense=Pres|VerbForm=Fin",
+      "347": "VERB#Verb#Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act",
+      "348": "VERB#Verb#Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass",
+      "349": "VERB#Verb#Mood=Sub|Number=Plur|Person=1|Tense=Past|VerbForm=Fin",
+      "350": "VERB#Verb#Mood=Sub|Tense=Past|VerbForm=Part",
+      "351": "VERB#Verb#Mood=Sub|Tense=Past|VerbForm=Part|Voice=Pass",
+      "352": "VERB#Verb#Mood=Sub|VerbForm=Inf",
+      "353": "VERB#Verb#Person=1|Tense=Past|VerbForm=Part",
+      "354": "VERB#Verb#Person=1|Tense=Past|VerbForm=Part|Voice=Pass",
+      "355": "VERB#Verb#Person=1|Tense=Pres|VerbForm=Ger",
+      "356": "VERB#Verb#Person=1|Tense=Pres|VerbForm=Inf",
+      "357": "VERB#Verb#Person=1|Tense=Pres|VerbForm=Part",
+      "358": "VERB#Verb#Person=2|Tense=Pres|VerbForm=Inf",
+      "359": "VERB#Verb#Tense=Past|VerbForm=Part",
+      "360": "VERB#Verb#Tense=Past|VerbForm=Part|Voice=Pass",
+      "361": "VERB#Verb#Tense=Pres|VerbForm=Part",
+      "362": "VERB#Verb#VerbForm=Fin",
+      "363": "VERB#Verb#VerbForm=Ger",
+      "364": "VERB#Verb#VerbForm=Inf",
+      "365": "VERB#Verb#VerbForm=Inf|Voice=Act",
+      "366": "VERB#Verb#VerbForm=Inf|Voice=Pass",
+      "367": "VERB#Verb#VerbForm=Sup",
+      "368": "VERB#Verb#VerbForm=Sup|Voice=Act",
+      "369": "VERB#Verb#VerbForm=Sup|Voice=Pass",
+      "370": "VERB#_#Mood=Ind|Tense=Past|VerbForm=Fin",
+      "371": "VERB#_#Tense=Past|VerbForm=Part",
+      "372": "VERB#_#VerbForm=Ger",
+      "373": "VERB#_#VerbForm=Inf",
+      "374": "X#_#Foreign=Yes",
+      "375": "X#_#Typo=Yes",
+      "376": "X#_#_",
+      "377": "X#_#foreign=Yes"
+    },
+    "lemma_rule": {
+      "0": "cut_prefix=0|cut_suffix=0|append_suffix=",
+      "1": "cut_prefix=0|cut_suffix=0|append_suffix='",
+      "2": "cut_prefix=0|cut_suffix=0|append_suffix=.",
+      "3": "cut_prefix=0|cut_suffix=0|append_suffix=a",
+      "4": "cut_prefix=0|cut_suffix=0|append_suffix=d",
+      "5": "cut_prefix=0|cut_suffix=0|append_suffix=e",
+      "6": "cut_prefix=0|cut_suffix=0|append_suffix=ma",
+      "7": "cut_prefix=0|cut_suffix=0|append_suffix=n",
+      "8": "cut_prefix=0|cut_suffix=0|append_suffix=o",
+      "9": "cut_prefix=0|cut_suffix=0|append_suffix=s",
+      "10": "cut_prefix=0|cut_suffix=0|append_suffix=t",
+      "11": "cut_prefix=0|cut_suffix=0|append_suffix=y",
+      "12": "cut_prefix=0|cut_suffix=11|append_suffix=#url",
+      "13": "cut_prefix=0|cut_suffix=12|append_suffix=#url",
+      "14": "cut_prefix=0|cut_suffix=14|append_suffix=#url",
+      "15": "cut_prefix=0|cut_suffix=1|append_suffix=",
+      "16": "cut_prefix=0|cut_suffix=1|append_suffix=a",
+      "17": "cut_prefix=0|cut_suffix=1|append_suffix=ad",
+      "18": "cut_prefix=0|cut_suffix=1|append_suffix=as",
+      "19": "cut_prefix=0|cut_suffix=1|append_suffix=be",
+      "20": "cut_prefix=0|cut_suffix=1|append_suffix=d",
+      "21": "cut_prefix=0|cut_suffix=1|append_suffix=e",
+      "22": "cut_prefix=0|cut_suffix=1|append_suffix=ed",
+      "23": "cut_prefix=0|cut_suffix=1|append_suffix=en",
+      "24": "cut_prefix=0|cut_suffix=1|append_suffix=et",
+      "25": "cut_prefix=0|cut_suffix=1|append_suffix=g",
+      "26": "cut_prefix=0|cut_suffix=1|append_suffix=ght",
+      "27": "cut_prefix=0|cut_suffix=1|append_suffix=have",
+      "28": "cut_prefix=0|cut_suffix=1|append_suffix=ill",
+      "29": "cut_prefix=0|cut_suffix=1|append_suffix=ja",
+      "30": "cut_prefix=0|cut_suffix=1|append_suffix=n",
+      "31": "cut_prefix=0|cut_suffix=1|append_suffix=na",
+      "32": "cut_prefix=0|cut_suffix=1|append_suffix=o",
+      "33": "cut_prefix=0|cut_suffix=1|append_suffix=ola",
+      "34": "cut_prefix=0|cut_suffix=1|append_suffix=on",
+      "35": "cut_prefix=0|cut_suffix=1|append_suffix=ot",
+      "36": "cut_prefix=0|cut_suffix=1|append_suffix=um",
+      "37": "cut_prefix=0|cut_suffix=1|append_suffix=ve",
+      "38": "cut_prefix=0|cut_suffix=1|append_suffix=y",
+      "39": "cut_prefix=0|cut_suffix=1|append_suffix=ym",
+      "40": "cut_prefix=0|cut_suffix=1|append_suffix=\u00e9",
+      "41": "cut_prefix=0|cut_suffix=1|append_suffix=\u014d",
+      "42": "cut_prefix=0|cut_suffix=20|append_suffix=",
+      "43": "cut_prefix=0|cut_suffix=2|append_suffix=",
+      "44": "cut_prefix=0|cut_suffix=2|append_suffix=$",
+      "45": "cut_prefix=0|cut_suffix=2|append_suffix=a",
+      "46": "cut_prefix=0|cut_suffix=2|append_suffix=an",
+      "47": "cut_prefix=0|cut_suffix=2|append_suffix=ara",
+      "48": "cut_prefix=0|cut_suffix=2|append_suffix=ave",
+      "49": "cut_prefix=0|cut_suffix=2|append_suffix=aw",
+      "50": "cut_prefix=0|cut_suffix=2|append_suffix=be",
+      "51": "cut_prefix=0|cut_suffix=2|append_suffix=dd",
+      "52": "cut_prefix=0|cut_suffix=2|append_suffix=e",
+      "53": "cut_prefix=0|cut_suffix=2|append_suffix=ee",
+      "54": "cut_prefix=0|cut_suffix=2|append_suffix=el",
+      "55": "cut_prefix=0|cut_suffix=2|append_suffix=en",
+      "56": "cut_prefix=0|cut_suffix=2|append_suffix=ep",
+      "57": "cut_prefix=0|cut_suffix=2|append_suffix=er",
+      "58": "cut_prefix=0|cut_suffix=2|append_suffix=et",
+      "59": "cut_prefix=0|cut_suffix=2|append_suffix=g",
+      "60": "cut_prefix=0|cut_suffix=2|append_suffix=have",
+      "61": "cut_prefix=0|cut_suffix=2|append_suffix=i",
+      "62": "cut_prefix=0|cut_suffix=2|append_suffix=ig",
+      "63": "cut_prefix=0|cut_suffix=2|append_suffix=igga",
+      "64": "cut_prefix=0|cut_suffix=2|append_suffix=in",
+      "65": "cut_prefix=0|cut_suffix=2|append_suffix=is",
+      "66": "cut_prefix=0|cut_suffix=2|append_suffix=it",
+      "67": "cut_prefix=0|cut_suffix=2|append_suffix=ja",
+      "68": "cut_prefix=0|cut_suffix=2|append_suffix=ke",
+      "69": "cut_prefix=0|cut_suffix=2|append_suffix=l",
+      "70": "cut_prefix=0|cut_suffix=2|append_suffix=mal",
+      "71": "cut_prefix=0|cut_suffix=2|append_suffix=n",
+      "72": "cut_prefix=0|cut_suffix=2|append_suffix=na",
+      "73": "cut_prefix=0|cut_suffix=2|append_suffix=ny",
+      "74": "cut_prefix=0|cut_suffix=2|append_suffix=o",
+      "75": "cut_prefix=0|cut_suffix=2|append_suffix=on",
+      "76": "cut_prefix=0|cut_suffix=2|append_suffix=ose",
+      "77": "cut_prefix=0|cut_suffix=2|append_suffix=ot",
+      "78": "cut_prefix=0|cut_suffix=2|append_suffix=ow",
+      "79": "cut_prefix=0|cut_suffix=2|append_suffix=u",
+      "80": "cut_prefix=0|cut_suffix=2|append_suffix=um",
+      "81": "cut_prefix=0|cut_suffix=2|append_suffix=un",
+      "82": "cut_prefix=0|cut_suffix=2|append_suffix=unna",
+      "83": "cut_prefix=0|cut_suffix=2|append_suffix=we",
+      "84": "cut_prefix=0|cut_suffix=2|append_suffix=y",
+      "85": "cut_prefix=0|cut_suffix=2|append_suffix=ycket",
+      "86": "cut_prefix=0|cut_suffix=2|append_suffix=yda",
+      "87": "cut_prefix=0|cut_suffix=2|append_suffix=yta",
+      "88": "cut_prefix=0|cut_suffix=2|append_suffix=\u00e5",
+      "89": "cut_prefix=0|cut_suffix=2|append_suffix=\u00e5ta",
+      "90": "cut_prefix=0|cut_suffix=2|append_suffix=\u00e8s",
+      "91": "cut_prefix=0|cut_suffix=2|append_suffix=\u00e9o",
+      "92": "cut_prefix=0|cut_suffix=3|append_suffix=",
+      "93": "cut_prefix=0|cut_suffix=3|append_suffix=-up",
+      "94": "cut_prefix=0|cut_suffix=3|append_suffix=a",
+      "95": "cut_prefix=0|cut_suffix=3|append_suffix=ake",
+      "96": "cut_prefix=0|cut_suffix=3|append_suffix=an",
+      "97": "cut_prefix=0|cut_suffix=3|append_suffix=and",
+      "98": "cut_prefix=0|cut_suffix=3|append_suffix=and_annat",
+      "99": "cut_prefix=0|cut_suffix=3|append_suffix=any",
+      "100": "cut_prefix=0|cut_suffix=3|append_suffix=as",
+      "101": "cut_prefix=0|cut_suffix=3|append_suffix=at",
+      "102": "cut_prefix=0|cut_suffix=3|append_suffix=be",
+      "103": "cut_prefix=0|cut_suffix=3|append_suffix=e",
+      "104": "cut_prefix=0|cut_suffix=3|append_suffix=eak",
+      "105": "cut_prefix=0|cut_suffix=3|append_suffix=eal",
+      "106": "cut_prefix=0|cut_suffix=3|append_suffix=ear",
+      "107": "cut_prefix=0|cut_suffix=3|append_suffix=ell",
+      "108": "cut_prefix=0|cut_suffix=3|append_suffix=er",
+      "109": "cut_prefix=0|cut_suffix=3|append_suffix=f",
+      "110": "cut_prefix=0|cut_suffix=3|append_suffix=fe",
+      "111": "cut_prefix=0|cut_suffix=3|append_suffix=i",
+      "112": "cut_prefix=0|cut_suffix=3|append_suffix=ick",
+      "113": "cut_prefix=0|cut_suffix=3|append_suffix=ike",
+      "114": "cut_prefix=0|cut_suffix=3|append_suffix=ine",
+      "115": "cut_prefix=0|cut_suffix=3|append_suffix=ink",
+      "116": "cut_prefix=0|cut_suffix=3|append_suffix=is",
+      "117": "cut_prefix=0|cut_suffix=3|append_suffix=ite",
+      "118": "cut_prefix=0|cut_suffix=3|append_suffix=ive",
+      "119": "cut_prefix=0|cut_suffix=3|append_suffix=jag",
+      "120": "cut_prefix=0|cut_suffix=3|append_suffix=liten",
+      "121": "cut_prefix=0|cut_suffix=3|append_suffix=m",
+      "122": "cut_prefix=0|cut_suffix=3|append_suffix=nan",
+      "123": "cut_prefix=0|cut_suffix=3|append_suffix=nna",
+      "124": "cut_prefix=0|cut_suffix=3|append_suffix=ola",
+      "125": "cut_prefix=0|cut_suffix=3|append_suffix=ome",
+      "126": "cut_prefix=0|cut_suffix=3|append_suffix=oot",
+      "127": "cut_prefix=0|cut_suffix=3|append_suffix=ose",
+      "128": "cut_prefix=0|cut_suffix=3|append_suffix=r",
+      "129": "cut_prefix=0|cut_suffix=3|append_suffix=ra",
+      "130": "cut_prefix=0|cut_suffix=3|append_suffix=sia",
+      "131": "cut_prefix=0|cut_suffix=3|append_suffix=uch",
+      "132": "cut_prefix=0|cut_suffix=3|append_suffix=vi",
+      "133": "cut_prefix=0|cut_suffix=3|append_suffix=y",
+      "134": "cut_prefix=0|cut_suffix=3|append_suffix=ycket",
+      "135": "cut_prefix=0|cut_suffix=3|append_suffix=ze",
+      "136": "cut_prefix=0|cut_suffix=3|append_suffix=\u00e4ga",
+      "137": "cut_prefix=0|cut_suffix=3|append_suffix=\u00e4gga",
+      "138": "cut_prefix=0|cut_suffix=3|append_suffix=\u00e5",
+      "139": "cut_prefix=0|cut_suffix=3|append_suffix=\u00e5_kallad",
+      "140": "cut_prefix=0|cut_suffix=3|append_suffix=\u00e8ne",
+      "141": "cut_prefix=0|cut_suffix=3|append_suffix=\u00e8re",
+      "142": "cut_prefix=0|cut_suffix=4|append_suffix=",
+      "143": "cut_prefix=0|cut_suffix=4|append_suffix=#url",
+      "144": "cut_prefix=0|cut_suffix=4|append_suffix=-up",
+      "145": "cut_prefix=0|cut_suffix=4|append_suffix=a",
+      "146": "cut_prefix=0|cut_suffix=4|append_suffix=ader",
+      "147": "cut_prefix=0|cut_suffix=4|append_suffix=all",
+      "148": "cut_prefix=0|cut_suffix=4|append_suffix=an",
+      "149": "cut_prefix=0|cut_suffix=4|append_suffix=ay",
+      "150": "cut_prefix=0|cut_suffix=4|append_suffix=e",
+      "151": "cut_prefix=0|cut_suffix=4|append_suffix=eak",
+      "152": "cut_prefix=0|cut_suffix=4|append_suffix=eal",
+      "153": "cut_prefix=0|cut_suffix=4|append_suffix=eeze",
+      "154": "cut_prefix=0|cut_suffix=4|append_suffix=go",
+      "155": "cut_prefix=0|cut_suffix=4|append_suffix=good",
+      "156": "cut_prefix=0|cut_suffix=4|append_suffix=ie",
+      "157": "cut_prefix=0|cut_suffix=4|append_suffix=ill",
+      "158": "cut_prefix=0|cut_suffix=4|append_suffix=ind",
+      "159": "cut_prefix=0|cut_suffix=4|append_suffix=ingly",
+      "160": "cut_prefix=0|cut_suffix=4|append_suffix=ke",
+      "161": "cut_prefix=0|cut_suffix=4|append_suffix=nment",
+      "162": "cut_prefix=0|cut_suffix=4|append_suffix=ola",
+      "163": "cut_prefix=0|cut_suffix=4|append_suffix=on",
+      "164": "cut_prefix=0|cut_suffix=4|append_suffix=or",
+      "165": "cut_prefix=0|cut_suffix=4|append_suffix=ot",
+      "166": "cut_prefix=0|cut_suffix=4|append_suffix=r",
+      "167": "cut_prefix=0|cut_suffix=4|append_suffix=ra",
+      "168": "cut_prefix=0|cut_suffix=4|append_suffix=t",
+      "169": "cut_prefix=0|cut_suffix=4|append_suffix=tch",
+      "170": "cut_prefix=0|cut_suffix=4|append_suffix=y",
+      "171": "cut_prefix=0|cut_suffix=4|append_suffix=\u00e5g",
+      "172": "cut_prefix=0|cut_suffix=4|append_suffix=\u00edtez",
+      "173": "cut_prefix=0|cut_suffix=4|append_suffix=\u00f6ra",
+      "174": "cut_prefix=0|cut_suffix=5|append_suffix=",
+      "175": "cut_prefix=0|cut_suffix=5|append_suffix=-chat",
+      "176": "cut_prefix=0|cut_suffix=5|append_suffix=a",
+      "177": "cut_prefix=0|cut_suffix=5|append_suffix=an",
+      "178": "cut_prefix=0|cut_suffix=5|append_suffix=bad",
+      "179": "cut_prefix=0|cut_suffix=5|append_suffix=badly",
+      "180": "cut_prefix=0|cut_suffix=5|append_suffix=be",
+      "181": "cut_prefix=0|cut_suffix=5|append_suffix=d\u00e5lig",
+      "182": "cut_prefix=0|cut_suffix=5|append_suffix=each",
+      "183": "cut_prefix=0|cut_suffix=5|append_suffix=ead",
+      "184": "cut_prefix=0|cut_suffix=5|append_suffix=eek",
+      "185": "cut_prefix=0|cut_suffix=5|append_suffix=er",
+      "186": "cut_prefix=0|cut_suffix=5|append_suffix=esto",
+      "187": "cut_prefix=0|cut_suffix=5|append_suffix=et",
+      "188": "cut_prefix=0|cut_suffix=5|append_suffix=etts",
+      "189": "cut_prefix=0|cut_suffix=5|append_suffix=g\u00e4rna",
+      "190": "cut_prefix=0|cut_suffix=5|append_suffix=he",
+      "191": "cut_prefix=0|cut_suffix=5|append_suffix=ician",
+      "192": "cut_prefix=0|cut_suffix=5|append_suffix=ill",
+      "193": "cut_prefix=0|cut_suffix=5|append_suffix=ing",
+      "194": "cut_prefix=0|cut_suffix=5|append_suffix=ink",
+      "195": "cut_prefix=0|cut_suffix=5|append_suffix=kick",
+      "196": "cut_prefix=0|cut_suffix=5|append_suffix=lation",
+      "197": "cut_prefix=0|cut_suffix=5|append_suffix=oder",
+      "198": "cut_prefix=0|cut_suffix=5|append_suffix=on",
+      "199": "cut_prefix=0|cut_suffix=5|append_suffix=r",
+      "200": "cut_prefix=0|cut_suffix=5|append_suffix=ra",
+      "201": "cut_prefix=0|cut_suffix=5|append_suffix=ry",
+      "202": "cut_prefix=0|cut_suffix=5|append_suffix=seek",
+      "203": "cut_prefix=0|cut_suffix=5|append_suffix=uy",
+      "204": "cut_prefix=0|cut_suffix=5|append_suffix=\u00e9r\u00e8se",
+      "205": "cut_prefix=0|cut_suffix=6|append_suffix=ar",
+      "206": "cut_prefix=0|cut_suffix=6|append_suffix=er",
+      "207": "cut_prefix=0|cut_suffix=6|append_suffix=good",
+      "208": "cut_prefix=0|cut_suffix=6|append_suffix=pany",
+      "209": "cut_prefix=0|cut_suffix=6|append_suffix=rule",
+      "210": "cut_prefix=0|cut_suffix=6|append_suffix=zation",
+      "211": "cut_prefix=0|cut_suffix=7|append_suffix=efine",
+      "212": "cut_prefix=0|cut_suffix=8|append_suffix=or",
+      "213": "cut_prefix=1|cut_suffix=0|append_suffix=",
+      "214": "cut_prefix=1|cut_suffix=0|append_suffix=a",
+      "215": "cut_prefix=1|cut_suffix=2|append_suffix=",
+      "216": "cut_prefix=1|cut_suffix=2|append_suffix=ll",
+      "217": "cut_prefix=1|cut_suffix=3|append_suffix=",
+      "218": "cut_prefix=1|cut_suffix=3|append_suffix=te",
+      "219": "cut_prefix=1|cut_suffix=4|append_suffix=ll",
+      "220": "cut_prefix=1|cut_suffix=6|append_suffix=url",
+      "221": "cut_prefix=2|cut_suffix=0|append_suffix=",
+      "222": "cut_prefix=2|cut_suffix=0|append_suffix=a",
+      "223": "cut_prefix=2|cut_suffix=1|append_suffix=",
+      "224": "cut_prefix=2|cut_suffix=1|append_suffix=empel",
+      "225": "cut_prefix=2|cut_suffix=1|append_suffix=n",
+      "226": "cut_prefix=2|cut_suffix=2|append_suffix=",
+      "227": "cut_prefix=2|cut_suffix=2|append_suffix=a",
+      "228": "cut_prefix=2|cut_suffix=3|append_suffix=",
+      "229": "cut_prefix=2|cut_suffix=3|append_suffix=as",
+      "230": "cut_prefix=2|cut_suffix=3|append_suffix=n",
+      "231": "cut_prefix=3|cut_suffix=0|append_suffix=",
+      "232": "cut_prefix=3|cut_suffix=1|append_suffix=",
+      "233": "cut_prefix=3|cut_suffix=1|append_suffix=e",
+      "234": "cut_prefix=3|cut_suffix=2|append_suffix=",
+      "235": "cut_prefix=4|cut_suffix=0|append_suffix=",
+      "236": "cut_prefix=4|cut_suffix=1|append_suffix=g",
+      "237": "cut_prefix=4|cut_suffix=20|append_suffix=rl",
+      "238": "cut_prefix=5|cut_suffix=0|append_suffix=",
+      "239": "cut_prefix=5|cut_suffix=4|append_suffix=",
+      "240": "cut_prefix=6|cut_suffix=0|append_suffix=",
+      "241": "cut_prefix=7|cut_suffix=0|append_suffix="
+    },
+    "misc": {
+      "0": "Cxn=rc-that-nsubj",
+      "1": "Cxn=rc-that-obj",
+      "2": "Cxn=rc-wh-nsubj",
+      "3": "Cxn=rc-wh-obl",
+      "4": "Cxn=rc-wh-obl-pfront",
+      "5": "Promoted=Yes|SpaceAfter=No",
+      "6": "SpaceAfter=No",
+      "7": "_",
+      "8": "ellipsis"
+    },
+    "semclass": {
+      "0": "ABILITY_OF_BEING",
+      "1": "ACCESSORY",
+      "2": "ACT",
+      "3": "ACTIVITY",
+      "4": "ACTIVITY_BY_INTEREST",
+      "5": "ADMINISTRATIVE_REGION",
+      "6": "ADVENTURE",
+      "7": "AGGREGATE",
+      "8": "AGGREGATE_OF_LIVING_OBJECTS",
+      "9": "AGGREGATE_OF_MACHINERY_OR_TRANSPORT",
+      "10": "AGGRESSIVE_ACTIONS",
+      "11": "AGREEMENT_VERBS",
+      "12": "AGRICULTURAL_PROCESSING",
+      "13": "AMBIENCE_ENVIRONMENT",
+      "14": "APPARATUS",
+      "15": "AREA_OF_HUMAN_ACTIVITY",
+      "16": "ARRANGEMENTS",
+      "17": "ARTEFACT",
+      "18": "ARTICLES",
+      "19": "ATTRIBUTIVE",
+      "20": "AUXILIARY_VERBS",
+      "21": "BAD_DANGEROUS_EVENT",
+      "22": "BE",
+      "23": "BEGIN_TO_TAKE_PLACE",
+      "24": "BEHAVIOUR",
+      "25": "BEING",
+      "26": "BEVERAGE",
+      "27": "BE_STATE",
+      "28": "BIJOUTERIE_AND_JEWELLERY",
+      "29": "BODY",
+      "30": "BOOM",
+      "31": "BUSINESS",
+      "32": "BUSY_FREE_OCCUPIED",
+      "33": "CARGO",
+      "34": "CHANGE_OF_MATTER_PHYSICAL_STATE",
+      "35": "CHANGE_OF_ORGANIC_OBJECTS",
+      "36": "CHANGE_OF_POST_AND_JOB",
+      "37": "CHARACTERISTIC_GENERAL",
+      "38": "CHEMICAL_CHANGES",
+      "39": "CHOOSING_SORTING",
+      "40": "CH_ABSTRACT_GENERALIZED",
+      "41": "CH_APPEARANCE",
+      "42": "CH_ASPECT",
+      "43": "CH_BENEFIT",
+      "44": "CH_BY_RESIDENCE",
+      "45": "CH_BY_SENSORY_PERCEPTION",
+      "46": "CH_BY_WORLD_OUTLOOK_EDUCATION_AESTHETIC",
+      "47": "CH_CLASSIFICATION",
+      "48": "CH_COMPOSITION",
+      "49": "CH_CONFIGURATION_AND_FORM",
+      "50": "CH_COVERING",
+      "51": "CH_CRIMINAL_ACTIVITY",
+      "52": "CH_DEGREE",
+      "53": "CH_DEGREE_AND_INTENSITY",
+      "54": "CH_DISPOSITION_AND_MOTION",
+      "55": "CH_DISTRIBUTION",
+      "56": "CH_EVALUATION",
+      "57": "CH_EVALUATION_OF_HUMAN_TEMPER_AND_ACTIVITY",
+      "58": "CH_FULLNESS",
+      "59": "CH_FUNCTIONING_OF_ENTITY",
+      "60": "CH_INFORMATION",
+      "61": "CH_INTENTION_CONCENTRATION",
+      "62": "CH_LANGUAGE",
+      "63": "CH_MAGNITUDE",
+      "64": "CH_MEASURE",
+      "65": "CH_OF_CONNECTIONS",
+      "66": "CH_OF_INTENSITY",
+      "67": "CH_OF_LOCATION",
+      "68": "CH_OF_VISUAL_AUDIBLE_REPRESENTATION",
+      "69": "CH_PARAMETER_OF_MATTER",
+      "70": "CH_PARAMETER_OF_OBJECT_AND_SUBSTANCE",
+      "71": "CH_PARAMETER_SPEED",
+      "72": "CH_PERCEPTIBILITY",
+      "73": "CH_PERSON_IDENTITY",
+      "74": "CH_PHYSICAL_STATE",
+      "75": "CH_POWER_AND_EFFECT",
+      "76": "CH_PRICE_AND_SUMS",
+      "77": "CH_REFERENCE_AND_QUANTIFICATION",
+      "78": "CH_RENOWN",
+      "79": "CH_RESISTANCE_TO_IMPACT",
+      "80": "CH_RHYTHM",
+      "81": "CH_SALIENCE",
+      "82": "CH_SCALE",
+      "83": "CH_SOCIAL_CHARACTERISTIC",
+      "84": "CH_SPHERE_OF_COVERAGE",
+      "85": "CH_STYLE",
+      "86": "CH_SURFACE_EDGE",
+      "87": "CH_SYSTEM_STRUCTURE",
+      "88": "CH_TYPE_OF_POSSESSION_AND_PARTICIPATION",
+      "89": "CIRCUMSTANCE",
+      "90": "CLASSIFICATION_TYPES",
+      "91": "CLASSIFICATION_UNIT",
+      "92": "CLOTHES",
+      "93": "COGNITIVE_OBJECT",
+      "94": "COMMUNICATIONS",
+      "95": "COMPOSITE_PARTICLES",
+      "96": "COMPOSITE_SUFFIXES",
+      "97": "CONDITIONS_IN_NATURE",
+      "98": "CONDITION_IN_ECONOMICS",
+      "99": "CONDITION_OF_EXPERIENCER_AND_NATURE",
+      "100": "CONDITION_SITUATION",
+      "101": "CONDITION_STATE",
+      "102": "CONFLICT_INTERACTION",
+      "103": "CONJUNCTIONS",
+      "104": "CONSTRUCTION_AS_WHOLE",
+      "105": "CONTACT_VERBS",
+      "106": "CONTACT_WITH_CONTRAGENT",
+      "107": "CONTAINER",
+      "108": "CONTAIN_INCLUDE_FORM",
+      "109": "CONTINUE_TO_HAVE",
+      "110": "CONTINUE_TO_TAKE_PLACE",
+      "111": "COORDINATING_CONJUNCTIONS",
+      "112": "CORRELATIVES",
+      "113": "COSMOS_AND_COSMIC_OBJECTS",
+      "114": "COST",
+      "115": "COUNTRY_AS_ADMINISTRATIVE_UNIT",
+      "116": "CREATION_VERBS",
+      "117": "CREATIVE_WORK",
+      "118": "CREATIVE_WORK_BY_GENRE",
+      "119": "CRISIS",
+      "120": "CULTURE",
+      "121": "DECLINE",
+      "122": "DECORATING_AND_FINISHING",
+      "123": "DEFEND_SAVE",
+      "124": "DEGREE_OF_FIT",
+      "125": "DEGREE_OF_SIZE_OR_SCALE",
+      "126": "DESTRUCTION_VERBS",
+      "127": "DEVICE",
+      "128": "DEVICE_FOR_ANIMALS",
+      "129": "DEVICE_FOR_CLOSING_AND_LOCKING",
+      "130": "DEVICE_FOR_HEATING",
+      "131": "DEVICE_FOR_LIFTING_OBJECTS",
+      "132": "DEVICE_FOR_MEASURING_AND_COUNTING",
+      "133": "DIFFICULTIES",
+      "134": "DIFFICULT_AND_EASY",
+      "135": "DIMENSION",
+      "136": "DIMENSIONS_CHAR",
+      "137": "DISCOURSIVE_UNITS",
+      "138": "DISTANT_CONTACT",
+      "139": "DOCUMENT",
+      "140": "DYNAMIC_ARTS",
+      "141": "ECONOMIC_CHANGES",
+      "142": "ECONOMY",
+      "143": "EFFICIENCY_PRODUCTIVITY",
+      "144": "ELECTIONS",
+      "145": "EMBARGO",
+      "146": "EMOTIONS_AND_THEIR_EXPRESSION",
+      "147": "EMPTY_SUBJECT",
+      "148": "ENDINGS",
+      "149": "END_TO_TAKE_PLACE",
+      "150": "ENGINEERING_COMMUNICATIONS",
+      "151": "ENTITY_AS_RESULT_OF_ACTIVITY",
+      "152": "ENTITY_BY_FUNCTION_AND_PROPERTY",
+      "153": "ENTITY_BY_RELATION_TO_MAIN_PART",
+      "154": "ENTITY_BY_VALUE",
+      "155": "ENTITY_GENERAL",
+      "156": "ENTITY_OR_SITUATION_PRONOUN",
+      "157": "ETIQUETTE_COMMUNICATION",
+      "158": "EVENT",
+      "159": "EVERYDAY_PROCESSING",
+      "160": "EXISTENCE_AND_POSSESSION",
+      "161": "FACT_INCIDENT",
+      "162": "FATE",
+      "163": "FEELING_AS_CONDITION",
+      "164": "FINE_ARTS_OBJECTS",
+      "165": "FOOD",
+      "166": "FORCE_IN_PHYSICS",
+      "167": "FREQUENCY_CHAR",
+      "168": "FURNISHINGS_AND_DECORATION",
+      "169": "GENERAL_ACTION",
+      "170": "GOOD_BAD_CONDITION",
+      "171": "GRAMMATICAL_ELEMENTS",
+      "172": "GROUP",
+      "173": "HAVE_CLOTHING_ON",
+      "174": "HERITAGE",
+      "175": "HIERARCHICAL_VERBS",
+      "176": "HISTORICAL_LOCALITY_BY_NAME",
+      "177": "IDENTIFYING_ATTRIBUTE",
+      "178": "IDIOMATICAL_ELEMENTS",
+      "179": "INFORMATION",
+      "180": "INFORMATION_BEARER",
+      "181": "INFORMATION_COMMUNICATIONS",
+      "182": "INHABITED_LOCALITY",
+      "183": "INNOVATION",
+      "184": "INSTRUMENT",
+      "185": "INTELLECTUAL_ACTIVITY",
+      "186": "INTERPERSONAL_RELATIONS",
+      "187": "KIND",
+      "188": "KITCHENWARE_AND_TABLEWARE",
+      "189": "KNOWLEDGE",
+      "190": "KNOWLEDGE_FROM_EXPERIENCE",
+      "191": "KNOWLEDGE_FROM_EXPERIENCE_AND_DEDUCTION",
+      "192": "LACK_AND_PLENTY",
+      "193": "LAWS_AND_STANDARDS",
+      "194": "LINES",
+      "195": "LINE_FOR_COMMUNICATION",
+      "196": "LINGUISTIC_OBJECTS",
+      "197": "MAKE_EFFORTS",
+      "198": "MANAGE_FAIL_CONDITION",
+      "199": "MARKET_AS_AREA_OF_ACTIVITY",
+      "200": "MATERIALITY_CHAR",
+      "201": "MATHEMATICAL_OBJECTS",
+      "202": "MEANING_SENSE",
+      "203": "MEDICAL_OPERATIONS",
+      "204": "MENTAL_OBJECT",
+      "205": "METHOD_APPROACH_TECHNIQUE",
+      "206": "MIX_AS_AGGREGATE",
+      "207": "MODALITY",
+      "208": "MODE_OF_EXPRESSIVENESS",
+      "209": "MONEY",
+      "210": "MOTION",
+      "211": "MOTION_ACTIVITY",
+      "212": "MOTIVATE",
+      "213": "MOVEMENT_AS_ACTIVITY",
+      "214": "MULTIMEDIA",
+      "215": "MUSICAL_INSTRUMENT",
+      "216": "MYSTERY_SECRET",
+      "217": "NATURALNESS_GENUINENESS_CHAR",
+      "218": "NETWORK",
+      "219": "NONPRODUCTIVE_AREA",
+      "220": "NORMATIVE_LEGAL_ACTIVITY",
+      "221": "OBJECTS_BY_FORM_OF_MANIFESTATION",
+      "222": "OBJECTS_BY_FUNCTION",
+      "223": "OBJECT_BY_FUNCTION_AND_PROPERTY",
+      "224": "OBJECT_BY_SHAPE",
+      "225": "OBJECT_IN_NATURE",
+      "226": "OCCUPATIONS",
+      "227": "OPERATING_STATE",
+      "228": "OPTICAL_DEVICE_AND_ITS_PARTS",
+      "229": "ORDER_DISORDER",
+      "230": "ORGANIC_NON_ORGANIC",
+      "231": "ORGANIC_OBJECTS",
+      "232": "ORGANIZATION",
+      "233": "ORGANIZED_AGGREGATE",
+      "234": "ORIENTATION_IN_SPACE",
+      "235": "OUTFIT",
+      "236": "PARTICLES",
+      "237": "PART_OF_ARTEFACT",
+      "238": "PART_OF_CLOTHES",
+      "239": "PART_OF_CONSTRUCTION",
+      "240": "PART_OF_CREATIVE_WORK",
+      "241": "PART_OF_FOOTWEAR",
+      "242": "PART_OF_ORGANISM",
+      "243": "PART_OF_WORLD",
+      "244": "PART_OR_PORTION_OF_ENTITY",
+      "245": "PATH_AS_DIRECTION_OF_ACTIVITY",
+      "246": "PEACE",
+      "247": "PERCEPTION_ACTIVITY",
+      "248": "PHENOMENON",
+      "249": "PHRASAL_PARTICLES",
+      "250": "PHYSICAL_AND_BIOLOGICAL_PROPERTIES",
+      "251": "PHYSICAL_CHEMICAL_DAMAGE",
+      "252": "PHYSICAL_OBJECT",
+      "253": "PHYSICAL_OBJECT_AND_SUBSTANCE_CHAR",
+      "254": "PHYSICAL_PSYCHIC_CONDITION",
+      "255": "PHYSIOLOGICAL_PROCESSES",
+      "256": "PLACE",
+      "257": "PLANT",
+      "258": "POINTS_AS_PLACE",
+      "259": "POSITION_AS_STATUS",
+      "260": "POSITION_IN_HIERARCHY",
+      "261": "POSITION_IN_SPACE",
+      "262": "POWER_CHAR",
+      "263": "POWER_RIGHT",
+      "264": "PREMISES",
+      "265": "PREPOSITION",
+      "266": "PRESSURE_CHAR",
+      "267": "PROBLEMS_TO_SOLVE",
+      "268": "PROCESSING",
+      "269": "PROCESS_AND_ITS_STAGES",
+      "270": "PROCESS_PARAMETER",
+      "271": "PRODUCT",
+      "272": "PRODUCTION_AS_TIME_ART",
+      "273": "PRODUCTIVE_AREA",
+      "274": "PUBLIC_ACTIVITY",
+      "275": "PUBLIC_AND_POLITICAL_ACTIVITY",
+      "276": "QUIETNESS",
+      "277": "READINESS",
+      "278": "REALITY",
+      "279": "RELATIVE_ENTITY",
+      "280": "RELATIVE_PART_OF_INHABITED_LOCALITY",
+      "281": "RELATIVE_SPACE",
+      "282": "RELIGIOUS_OBJECT",
+      "283": "REMOVING_DESTRUCTION",
+      "284": "RESERVE",
+      "285": "RESULTS_OF_GIVING_INFORMATION_AND_SPEECH_ACTIVITY",
+      "286": "RESULTS_OF_MAKING_DECISIONS",
+      "287": "RESULTS_OF_MENTAL_ACTIVITY",
+      "288": "RESULT_CONSEQUENCE",
+      "289": "REVEAL_CONCEAL_INFORMATION",
+      "290": "REWARD_AS_ENTITY",
+      "291": "RISK_DANGER",
+      "292": "SAMPLE_AS_AGGREGATE",
+      "293": "SCALE_DIVISION",
+      "294": "SCHEDULE_FOR_ACTIVITY",
+      "295": "SCIENCE",
+      "296": "SCIENTIFIC_AND_LITERARY_WORK",
+      "297": "SEPARATION_PROCESSING",
+      "298": "SERIES_IN_SCIENCE",
+      "299": "SEXUAL_ACTIVITIES",
+      "300": "SILENCE_AS_SOUNDLESSNESS",
+      "301": "SITUATION",
+      "302": "SOCIAL_CONDITIONS_OF_BEING",
+      "303": "SPACE_AND_SPATIAL_OBJECTS",
+      "304": "SPACE_BY_PARTICULAR_PROPERTIES",
+      "305": "SPACE_BY_RELIGIOUS_BELIEFS",
+      "306": "SPACE_TIME_ART",
+      "307": "SPHERE_OF_ACTIVITY_GENERAL",
+      "308": "SPORT",
+      "309": "SPORT_DEVICE",
+      "310": "STAGNATION",
+      "311": "STATE_AREA",
+      "312": "STATE_OF_MIND",
+      "313": "STEADINESS_OF_FORM_OR_POSITION",
+      "314": "STREET_OR_TOWN_SUFFIXES",
+      "315": "SUBSTANCE",
+      "316": "SURFACE_AND_ITS_SPECIALITIES",
+      "317": "SYMBOLS_FOR_INFORMATION_TRANSFER",
+      "318": "SYSTEM_AS_AGGREGATE",
+      "319": "TEETH_AND_TONGUE_CONTACT",
+      "320": "TEMPERATURE_CHAR",
+      "321": "TENDENCY_AND_DISPOSITION",
+      "322": "TERRITORY_AREA",
+      "323": "TEST_FOR_EXPERIENCER",
+      "324": "TEXTS_OF_PROGRAMS",
+      "325": "TEXT_OBJECTS_AND_DOCUMENTS",
+      "326": "TEXT_WITH_ADDRESSEE",
+      "327": "THE_EARTH_AND_ITS_SPATIAL_PARTS",
+      "328": "THE_GOOD_BAD",
+      "329": "THE_MAGIC",
+      "330": "TIME",
+      "331": "TOPIC_SUBJECT",
+      "332": "TOTALITY_OF_DEGREE",
+      "333": "TO_ACCOMPANY_WITH",
+      "334": "TO_ACCUSE_AND_VINDICATE",
+      "335": "TO_ADAPT",
+      "336": "TO_ADD",
+      "337": "TO_ADJUST_AND_REPAIR",
+      "338": "TO_AIM",
+      "339": "TO_ANALYSE_AND_RESEARCH",
+      "340": "TO_ANIMATE_PICTURE",
+      "341": "TO_APPLAUD",
+      "342": "TO_APPLY_COAT",
+      "343": "TO_APPROACH_COME_TO_SOME_POINT_OR_STATE",
+      "344": "TO_ARREST",
+      "345": "TO_ASSEMBLE",
+      "346": "TO_ATTRIBUTE_AS_TO_ADD",
+      "347": "TO_AVOID",
+      "348": "TO_BEAT_AND_PRICK",
+      "349": "TO_BETRAY_AND_LEAVE",
+      "350": "TO_BE_ABOUT_TO_HAPPEN",
+      "351": "TO_BE_A_SIGN_OF",
+      "352": "TO_BE_BASED",
+      "353": "TO_BE_DESCENDED",
+      "354": "TO_BE_GUIDED",
+      "355": "TO_BE_SEEN_IN_FIELD_OF_VIEW",
+      "356": "TO_BLOW_UP",
+      "357": "TO_BREAK",
+      "358": "TO_BUILD",
+      "359": "TO_CALL_AND_DESIGNATE",
+      "360": "TO_CANCEL",
+      "361": "TO_CARE_AND_BRING_UP",
+      "362": "TO_CAUSE_OR_STOP_MOVEMENT",
+      "363": "TO_CAUSE_SUCCESS",
+      "364": "TO_CELEBRATE",
+      "365": "TO_CERTIFY",
+      "366": "TO_CHALLENGE_TO_INVITE",
+      "367": "TO_CHANGE",
+      "368": "TO_CHANGE_FORM",
+      "369": "TO_CHARACTERIZE",
+      "370": "TO_CITE",
+      "371": "TO_CLOSE",
+      "372": "TO_COME_OR_TO_LEAVE_SPHERE_OF_ACTIVITY",
+      "373": "TO_COMMENT",
+      "374": "TO_COMMIT",
+      "375": "TO_COMMUNICATE",
+      "376": "TO_COMPEL_AND_EVOKE",
+      "377": "TO_COMPEL_TO_ACCEPT",
+      "378": "TO_COMPOSE_SYMBOLS",
+      "379": "TO_CONCLUDE",
+      "380": "TO_CONNIVE",
+      "381": "TO_CONTRIBUTE_AND_HINDER",
+      "382": "TO_CORRECT",
+      "383": "TO_COUNT",
+      "384": "TO_COURT_AND_FLIRT",
+      "385": "TO_CREATE_HOLE",
+      "386": "TO_DECIDE",
+      "387": "TO_DESTINE",
+      "388": "TO_DEVELOP",
+      "389": "TO_DIG_PROCESS",
+      "390": "TO_DIRECT_CREATIVE_WORK",
+      "391": "TO_DISAPPEAR_LOSE_GET_RID_OF",
+      "392": "TO_DISTRACT_DEFLECT",
+      "393": "TO_DIVIDE",
+      "394": "TO_ECONOMIZE",
+      "395": "TO_EMIT",
+      "396": "TO_EXIST",
+      "397": "TO_FABRICATE",
+      "398": "TO_FEEL_AND_EXPRESS_MENTAL_ATTITUDE_TO",
+      "399": "TO_FLOW_IN_TIME",
+      "400": "TO_FORGIVE",
+      "401": "TO_FORM",
+      "402": "TO_FORMULATE",
+      "403": "TO_GENERATE",
+      "404": "TO_GESTURE",
+      "405": "TO_GET",
+      "406": "TO_GET_INFORMATION",
+      "407": "TO_GIVE",
+      "408": "TO_GIVE_SIGNALS",
+      "409": "TO_GO_ON_STRIKE",
+      "410": "TO_GUESS",
+      "411": "TO_HIDE",
+      "412": "TO_HURRY_TO_TARRY",
+      "413": "TO_INDEX",
+      "414": "TO_INDUCE_PHYSICAL_PROPERTIES",
+      "415": "TO_INTERACT",
+      "416": "TO_INTERCHANGE",
+      "417": "TO_INTERPRET",
+      "418": "TO_INVENT",
+      "419": "TO_INVOLVE",
+      "420": "TO_JOIN",
+      "421": "TO_JOIN_PHYSICAL_OBJECTS",
+      "422": "TO_KEEP_VIOLATE_NORMS",
+      "423": "TO_LEARN_AND_RESEARCH",
+      "424": "TO_LET_DOWN",
+      "425": "TO_LIQUIDATE",
+      "426": "TO_MAKE",
+      "427": "TO_MARRY_DIVORCE_ENGAGE",
+      "428": "TO_MEAN",
+      "429": "TO_MEASURE",
+      "430": "TO_MIX",
+      "431": "TO_MOVE_IN_GAMES",
+      "432": "TO_OPEN",
+      "433": "TO_ORGANIZE_EVENT",
+      "434": "TO_OVERTHROW",
+      "435": "TO_PARTICIPATE",
+      "436": "TO_PERCEIVE",
+      "437": "TO_PERFORM",
+      "438": "TO_PERFORM_MATHS_OPERATIONS",
+      "439": "TO_PERSUADE_SMB_TO_DO_SMTH",
+      "440": "TO_PICKET",
+      "441": "TO_PICTURE_DRAW",
+      "442": "TO_PLAN_CREATIVE_AND_PHYSICAL_OBJECTS",
+      "443": "TO_PLAY_GAMES",
+      "444": "TO_POSSESS",
+      "445": "TO_PRESS",
+      "446": "TO_PRESS_AS_TOUCH",
+      "447": "TO_PREVENT_SMTH",
+      "448": "TO_PRINT_TEXT_PHOTO",
+      "449": "TO_PROCESS_INFORMATION",
+      "450": "TO_PROCESS_PHYSICAL_OBJECT",
+      "451": "TO_PRODUCE_CERTAIN_SOUNDS",
+      "452": "TO_PROGRAM",
+      "453": "TO_PRONOUNCE",
+      "454": "TO_PROPOSE",
+      "455": "TO_PUNISH",
+      "456": "TO_RATIFY",
+      "457": "TO_REACT",
+      "458": "TO_READ_READABLE",
+      "459": "TO_REBEL",
+      "460": "TO_RECEIVE_CALLERS",
+      "461": "TO_REFLECT",
+      "462": "TO_REGISTER",
+      "463": "TO_REIGN_AS_TO_TAKE_PLACE",
+      "464": "TO_RELEASE",
+      "465": "TO_RESTORE",
+      "466": "TO_REVENGE",
+      "467": "TO_RUB_AND_SCRATCH",
+      "468": "TO_SABOTAGE",
+      "469": "TO_SCREEN",
+      "470": "TO_SEDUCE",
+      "471": "TO_SEEK_FIND",
+      "472": "TO_SEND_TO_DELIVER",
+      "473": "TO_SET",
+      "474": "TO_SHARE",
+      "475": "TO_SHINE",
+      "476": "TO_SHOOT_PHOTO_OR_FILM",
+      "477": "TO_SHOW",
+      "478": "TO_SMOKE",
+      "479": "TO_SOUND",
+      "480": "TO_SPEND",
+      "481": "TO_SPEND_INEFFECTIVELY",
+      "482": "TO_SPEND_TIME",
+      "483": "TO_SPOIL",
+      "484": "TO_STOP_SPEAKING",
+      "485": "TO_SUBSCRIBE",
+      "486": "TO_SUBSTITUTE_AND_EXCHANGE",
+      "487": "TO_SUMMARIZE",
+      "488": "TO_SUPPORT_AND_OPPOSE",
+      "489": "TO_SYMBOLIZE",
+      "490": "TO_TAKE",
+      "491": "TO_TAKE_FOOD_OR_MEDICINE",
+      "492": "TO_TAKE_INTO_CONSIDERATION",
+      "493": "TO_TAKE_PLACE_IN_NATURE",
+      "494": "TO_TEASE_AND_JOKE",
+      "495": "TO_TELEPHONE",
+      "496": "TO_TERRORIZE",
+      "497": "TO_THINK_ABOUT",
+      "498": "TO_THINK_OUT",
+      "499": "TO_TORTURE",
+      "500": "TO_TOUCH",
+      "501": "TO_TRADE",
+      "502": "TO_TURN_INTO",
+      "503": "TO_UNDERSTATE_TO_EXAGGERATE",
+      "504": "TO_USE",
+      "505": "TO_UTTER_ANIMAL_SOUNDS",
+      "506": "TO_VISUALIZE",
+      "507": "TO_WAIT",
+      "508": "TO_WORK",
+      "509": "TO_WRITE",
+      "510": "TRANSPORT",
+      "511": "TRANSPORT_COMMUNICATIONS",
+      "512": "TRIAL",
+      "513": "TRICK_MACHINATION",
+      "514": "UNCERTAINTY",
+      "515": "UNDERTAKING",
+      "516": "UNIT_OF_INFORMATION_QUANTITY",
+      "517": "UNKNOWN_SUBSTANTIVE_CLASS",
+      "518": "URBAN_SPACE_AND_ROADS",
+      "519": "VALUABLE",
+      "520": "VERBAL_COMMUNICATION",
+      "521": "VIOLENCE",
+      "522": "VIRTUAL_OBJECT",
+      "523": "VIRTUAL_TRANSFERENCE",
+      "524": "VISUAL_CHARACTERISTICS",
+      "525": "VISUAL_REPRESENTATION",
+      "526": "WEAPON_AND_ITS_PART",
+      "527": "WEIGHT_CHAR",
+      "528": "WORLD_OUTLOOK",
+      "529": "YES_NO_VERBS",
+      "530": "_"
+    },
+    "ud_deprel": {
+      "0": "acl",
+      "1": "acl:cleft",
+      "2": "acl:relcl",
+      "3": "advcl",
+      "4": "advcl:relcl",
+      "5": "advmod",
+      "6": "amod",
+      "7": "appos",
+      "8": "aux",
+      "9": "aux:pass",
+      "10": "case",
+      "11": "cc",
+      "12": "cc:preconj",
+      "13": "ccomp",
+      "14": "compound",
+      "15": "compound:prt",
+      "16": "conj",
+      "17": "cop",
+      "18": "csubj",
+      "19": "csubj:outer",
+      "20": "csubj:pass",
+      "21": "dep",
+      "22": "det",
+      "23": "det:predet",
+      "24": "discourse",
+      "25": "dislocated",
+      "26": "expl",
+      "27": "fixed",
+      "28": "flat",
+      "29": "flat:foreign",
+      "30": "flat:name",
+      "31": "flatname",
+      "32": "goeswith",
+      "33": "iobj",
+      "34": "list",
+      "35": "mark",
+      "36": "nmod",
+      "37": "nmod:desc",
+      "38": "nmod:npmod",
+      "39": "nmod:poss",
+      "40": "nmod:tmod",
+      "41": "nmod:unmarked",
+      "42": "nsubj",
+      "43": "nsubj:outer",
+      "44": "nsubj:pass",
+      "45": "nummod",
+      "46": "nummod:gov",
+      "47": "obj",
+      "48": "obl",
+      "49": "obl:agent",
+      "50": "obl:npmod",
+      "51": "obl:tmod",
+      "52": "obl:unmarked",
+      "53": "orphan",
+      "54": "parataxis",
+      "55": "punct",
+      "56": "reparandum",
+      "57": "root",
+      "58": "vocative",
+      "59": "xcomp"
+    }
+  }
+}

configuration.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from transformers import PretrainedConfig
+class CobaldParserConfig(PretrainedConfig):
+    model_type = "cobald_parser"
+    def __init__(
+        self,
+        encoder_model_name: str = None,
+        null_classifier_hidden_size: int = 0,
+        lemma_classifier_hidden_size: int = 0,
+        morphology_classifier_hidden_size: int = 0,
+        dependency_classifier_hidden_size: int = 0,
+        misc_classifier_hidden_size: int = 0,
+        deepslot_classifier_hidden_size: int = 0,
+        semclass_classifier_hidden_size: int = 0,
+        activation: str = 'relu',
+        dropout: float = 0.1,
+        consecutive_null_limit: int = 0,
+        vocabulary: dict[dict[int, str]] = {},
+        **kwargs
+    ):
+        self.encoder_model_name = encoder_model_name
+        self.null_classifier_hidden_size = null_classifier_hidden_size
+        self.consecutive_null_limit = consecutive_null_limit
+        self.lemma_classifier_hidden_size = lemma_classifier_hidden_size
+        self.morphology_classifier_hidden_size = morphology_classifier_hidden_size
+        self.dependency_classifier_hidden_size = dependency_classifier_hidden_size
+        self.misc_classifier_hidden_size = misc_classifier_hidden_size
+        self.deepslot_classifier_hidden_size = deepslot_classifier_hidden_size
+        self.semclass_classifier_hidden_size = semclass_classifier_hidden_size
+        self.activation = activation
+        self.dropout = dropout
+        # The serialized config stores mappings as strings,
+        # e.g. {"0": "acl", "1": "conj"}, so we have to convert them to int.
+        self.vocabulary = {
+            column: {int(k): v for k, v in labels.items()}
+            for column, labels in vocabulary.items()
+        }
+        super().__init__(**kwargs)

dependency_classifier.py ADDED Viewed

	@@ -0,0 +1,305 @@

+from copy import deepcopy
+import numpy as np
+import torch
+from torch import nn
+from torch import Tensor, FloatTensor, BoolTensor, LongTensor
+import torch.nn.functional as F
+from transformers.activations import ACT2FN
+from cobald_parser.bilinear_matrix_attention import BilinearMatrixAttention
+from cobald_parser.chu_liu_edmonds import decode_mst
+from cobald_parser.utils import pairwise_mask, replace_masked_values
+class DependencyHeadBase(nn.Module):
+    """
+    Base class for scoring arcs and relations between tokens in a dependency tree/graph.
+    """
+    def __init__(self, hidden_size: int, n_rels: int):
+        super().__init__()
+        self.arc_attention = BilinearMatrixAttention(
+            hidden_size,
+            hidden_size,
+            use_input_biases=True,
+            n_labels=1
+        )
+        self.rel_attention = BilinearMatrixAttention(
+            hidden_size,
+            hidden_size,
+            use_input_biases=True,
+            n_labels=n_rels
+        )
+    def forward(
+        self,
+        h_arc_head: Tensor,        # [batch_size, seq_len, hidden_size]
+        h_arc_dep: Tensor,         # ...
+        h_rel_head: Tensor,        # ...
+        h_rel_dep: Tensor,         # ...
+        gold_arcs: LongTensor,     # [batch_size, seq_len, seq_len]
+        null_mask: BoolTensor,     # [batch_size, seq_len]
+        padding_mask: BoolTensor   # [batch_size, seq_len]
+    ) -> dict[str, Tensor]:
+        # Score arcs.
+        # s_arc[:, i, j] = score of edge i -> j.
+        s_arc = self.arc_attention(h_arc_head, h_arc_dep)
+        # Mask undesirable values (padding, nulls, etc.) with -inf.
+        mask2d = pairwise_mask(null_mask & padding_mask)
+        replace_masked_values(s_arc, mask2d, replace_with=-1e8)
+        # Score arcs' relations.
+        # [batch_size, seq_len, seq_len, num_labels]
+        s_rel = self.rel_attention(h_rel_head, h_rel_dep).permute(0, 2, 3, 1)
+        # Calculate loss.
+        loss = 0.0
+        if gold_arcs is not None:
+            loss += self.calc_arc_loss(s_arc, gold_arcs)
+            loss += self.calc_rel_loss(s_rel, gold_arcs)
+        # Predict arcs based on the scores.
+        # [batch_size, seq_len, seq_len]
+        pred_arcs_matrix = self.predict_arcs(s_arc, null_mask, padding_mask)
+        # [batch_size, seq_len, seq_len]
+        pred_rels_matrix = self.predict_rels(s_rel)
+        # [n_pred_arcs, 4]
+        preds_combined = self.combine_arcs_rels(pred_arcs_matrix, pred_rels_matrix)
+        return {
+            'preds': preds_combined,
+            'loss': loss
+        }
+    @staticmethod
+    def calc_arc_loss(
+        s_arc: Tensor,         # [batch_size, seq_len, seq_len]
+        gold_arcs: LongTensor  # [n_arcs, 4]
+    ) -> Tensor:
+        """Calculate arc loss."""
+        raise NotImplementedError
+    @staticmethod
+    def calc_rel_loss(
+        s_rel: Tensor,         # [batch_size, seq_len, seq_len, num_labels]
+        gold_arcs: LongTensor  # [n_arcs, 4]
+    ) -> Tensor:
+        batch_idxs, arcs_from, arcs_to, rels = gold_arcs.T
+        return F.cross_entropy(s_rel[batch_idxs, arcs_from, arcs_to], rels)
+    def predict_arcs(
+        self,
+        s_arc: Tensor,           # [batch_size, seq_len, seq_len]
+        null_mask: BoolTensor,   # [batch_size, seq_len]
+        padding_mask: BoolTensor # [batch_size, seq_len]
+    ) -> LongTensor:
+        """Predict arcs from scores."""
+        raise NotImplementedError
+    def predict_rels(
+        self,
+        s_rel: FloatTensor
+    ) -> LongTensor:
+        return s_rel.argmax(dim=-1).long()
+    @staticmethod
+    def combine_arcs_rels(
+        pred_arcs: LongTensor,
+        pred_rels: LongTensor
+    ) -> LongTensor:
+        """Select relations towards predicted arcs."""
+        assert pred_arcs.shape == pred_rels.shape
+        # Get indices where arcs exist
+        indices = pred_arcs.nonzero(as_tuple=True)
+        batch_idxs, from_idxs, to_idxs = indices
+        # Get corresponding relation types
+        rel_types = pred_rels[batch_idxs, from_idxs, to_idxs]
+        # Stack as [batch_idx, from_idx, to_idx, rel_type]
+        return torch.stack([batch_idxs, from_idxs, to_idxs, rel_types], dim=1)
+class DependencyHead(DependencyHeadBase):
+    """
+    Basic UD syntax specialization that predicts single edge for each token.
+    """
+    def predict_arcs(
+        self,
+        s_arc: Tensor,           # [batch_size, seq_len, seq_len]
+        null_mask: BoolTensor,   # [batch_size, seq_len]
+        padding_mask: BoolTensor # [batch_size, seq_len, seq_len]
+    ) -> Tensor:
+        if self.training:
+            # During training, use fast greedy decoding.
+            # - [batch_size, seq_len]
+            pred_arcs_seq = s_arc.argmax(dim=1)
+        else:
+            # FIXME
+            # During inference, decode Maximum Spanning Tree.
+            # pred_arcs_seq = self._mst_decode(s_arc, padding_mask)
+            pred_arcs_seq = s_arc.argmax(dim=1)
+        # Upscale arcs sequence of shape [batch_size, seq_len]
+        # to matrix of shape [batch_size, seq_len, seq_len].
+        pred_arcs = F.one_hot(pred_arcs_seq, num_classes=pred_arcs_seq.size(1)).long().transpose(1, 2)
+        # Apply mask one more time (even though s_arc is already masked),
+        # because argmax erases information about masked values.
+        mask2d = pairwise_mask(null_mask & padding_mask)
+        replace_masked_values(pred_arcs, mask2d, replace_with=0)
+        return pred_arcs
+    def _mst_decode(
+        self,
+        s_arc: Tensor,    # [batch_size, seq_len, seq_len]
+        padding_mask: Tensor
+    ) -> tuple[Tensor, Tensor]:
+        batch_size = s_arc.size(0)
+        device = s_arc.device
+        s_arc = s_arc.cpu()
+        # Convert scores to probabilities, as `decode_mst` expects non-negative values.
+        arc_probs = nn.functional.softmax(s_arc, dim=1)
+        # `decode_mst` knows nothing about UD and ROOT, so we have to manually
+        # zero probabilities of arcs leading to ROOT to make sure ROOT is a source node
+        # of a graph.
+        # Decode ROOT positions from diagonals.
+        # shape: [batch_size]
+        root_idxs = arc_probs.diagonal(dim1=1, dim2=2).argmax(dim=-1)
+        # Zero out arcs leading to ROOTs.
+        arc_probs[torch.arange(batch_size), :, root_idxs] = 0.0
+        pred_arcs = []
+        for sample_idx in range(batch_size):
+            energy = arc_probs[sample_idx]
+            length = padding_mask[sample_idx].sum()
+            heads = decode_mst(energy, length)
+            # Some nodes may be isolated. Pick heads greedily in this case.
+            heads[heads <= 0] = s_arc[sample_idx].argmax(dim=1)[heads <= 0]
+            pred_arcs.append(heads)
+        # shape: [batch_size, seq_len]
+        pred_arcs = torch.from_numpy(np.stack(pred_arcs)).long().to(device)
+        return pred_arcs
+    @staticmethod
+    def calc_arc_loss(
+        s_arc: Tensor,         # [batch_size, seq_len, seq_len]
+        gold_arcs: LongTensor  # [n_arcs, 4]
+    ) -> tuple[Tensor, Tensor]:
+        batch_idxs, from_idxs, to_idxs, _ = gold_arcs.T
+        return F.cross_entropy(s_arc[batch_idxs, :, to_idxs], from_idxs)
+class MultiDependencyHead(DependencyHeadBase):
+    """
+    Enhanced UD syntax specialization that predicts multiple edges for each token.
+    """
+    def predict_arcs(
+        self,
+        s_arc: Tensor,           # [batch_size, seq_len, seq_len]
+        null_mask: BoolTensor,   # [batch_size, seq_len]
+        padding_mask: BoolTensor # [batch_size, seq_len]
+    ) -> Tensor:
+        # Convert scores to probabilities.
+        arc_probs = torch.sigmoid(s_arc)
+        # Find confident arcs (with prob > 0.5).
+        return arc_probs.round().long()
+    @staticmethod
+    def calc_arc_loss(
+        s_arc: Tensor,         # [batch_size, seq_len, seq_len]
+        gold_arcs: LongTensor  # [n_arcs, 4]
+    ) -> Tensor:
+        batch_idxs, from_idxs, to_idxs, _ = gold_arcs.T
+        # Gold arcs but as a matrix, where matrix[i, arcs_from, arc_to] = 1.0 if arcs is present.
+        gold_arcs_matrix = torch.zeros_like(s_arc)
+        gold_arcs_matrix[batch_idxs, from_idxs, to_idxs] = 1.0
+        # Padded arcs's logits are huge negative values that doesn't contribute to the loss.
+        return F.binary_cross_entropy_with_logits(s_arc, gold_arcs_matrix)
+class DependencyClassifier(nn.Module):
+    """
+    Dozat and Manning's biaffine dependency classifier.
+    """
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        n_rels_ud: int,
+        n_rels_eud: int,
+        activation: str,
+        dropout: float,
+    ):
+        super().__init__()
+        self.arc_dep_mlp = nn.Sequential(
+            nn.Dropout(dropout),
+            nn.Linear(input_size, hidden_size),
+            ACT2FN[activation],
+            nn.Dropout(dropout)
+        )
+        # All mlps are equal.
+        self.arc_head_mlp = deepcopy(self.arc_dep_mlp)
+        self.rel_dep_mlp = deepcopy(self.arc_dep_mlp)
+        self.rel_head_mlp = deepcopy(self.arc_dep_mlp)
+        self.dependency_head_ud = DependencyHead(hidden_size, n_rels_ud)
+        self.dependency_head_eud = MultiDependencyHead(hidden_size, n_rels_eud)
+    def forward(
+        self,
+        embeddings: Tensor,    # [batch_size, seq_len, embedding_size]
+        gold_ud: Tensor,       # [n_ud_arcs, 4]
+        gold_eud: Tensor,      # [n_eud_arcs, 4]
+        null_mask: Tensor,     # [batch_size, seq_len]
+        padding_mask: Tensor   # [batch_size, seq_len]
+    ) -> dict[str, Tensor]:
+        # - [batch_size, seq_len, hidden_size]
+        h_arc_head = self.arc_head_mlp(embeddings)
+        h_arc_dep = self.arc_dep_mlp(embeddings)
+        h_rel_head = self.rel_head_mlp(embeddings)
+        h_rel_dep = self.rel_dep_mlp(embeddings)
+        # Share the h vectors between dependency and multi-dependency heads.
+        output_ud = self.dependency_head_ud(
+            h_arc_head,
+            h_arc_dep,
+            h_rel_head,
+            h_rel_dep,
+            gold_arcs=gold_ud,
+            null_mask=null_mask,
+            padding_mask=padding_mask
+        )
+        output_eud = self.dependency_head_eud(
+            h_arc_head,
+            h_arc_dep,
+            h_rel_head,
+            h_rel_dep,
+            gold_arcs=gold_eud,
+            # Ignore null mask in E-UD
+            null_mask=torch.ones_like(padding_mask),
+            padding_mask=padding_mask
+        )
+        return {
+            'preds_ud': output_ud["preds"],
+            'preds_eud': output_eud["preds"],
+            'loss_ud': output_ud["loss"],
+            'loss_eud': output_eud["loss"]
+        }

encoder.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import torch
+from torch import nn
+from torch import Tensor, LongTensor
+from transformers import AutoTokenizer, AutoModel
+class WordTransformerEncoder(nn.Module):
+    """
+    Encodes sentences into word-level embeddings using a pretrained MLM transformer.
+    """
+    def __init__(self, model_name: str):
+        super().__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Model like BERT, RoBERTa, etc.
+        self.model = AutoModel.from_pretrained(model_name)
+    def forward(self, words: list[list[str]]) -> Tensor:
+        """
+        Build words embeddings.
+        - Tokenizes input sentences into subtokens.
+        - Passes the subtokens through the pre-trained transformer model.
+        - Aggregates subtoken embeddings into word embeddings using mean pooling.
+        """
+        batch_size = len(words)
+        # BPE tokenization: split words into subtokens, e.g. ['kidding'] -> ['▁ki', 'dding'].
+        subtokens = self.tokenizer(
+            words,
+            padding=True,
+            truncation=True,
+            is_split_into_words=True,
+            return_tensors='pt'
+        )
+        subtokens = subtokens.to(self.model.device)
+        # Index words from 1 and reserve 0 for special subtokens (e.g. <s>, </s>, padding, etc.).
+        # Such numeration makes a following aggregation easier.
+        words_ids = torch.stack([
+            torch.tensor(
+                [word_id + 1 if word_id is not None else 0 for word_id in subtokens.word_ids(batch_idx)],
+                dtype=torch.long,
+                device=self.model.device
+            )
+            for batch_idx in range(batch_size)
+        ])
+        # Run model and extract subtokens embeddings from the last layer.
+        subtokens_embeddings = self.model(**subtokens).last_hidden_state
+        # Aggreate subtokens embeddings into words embeddings.
+        # [batch_size, n_words, embedding_size]
+        words_emeddings = self._aggregate_subtokens_embeddings(subtokens_embeddings, words_ids)
+        return words_emeddings
+    def _aggregate_subtokens_embeddings(
+        self,
+        subtokens_embeddings: Tensor, # [batch_size, n_subtokens, embedding_size]
+        words_ids: LongTensor          # [batch_size, n_subtokens]
+    ) -> Tensor:
+        """
+        Aggregate subtoken embeddings into word embeddings by averaging.
+        This method ensures that multiple subtokens corresponding to a single word are combined
+        into a single embedding.
+        """
+        batch_size, n_subtokens, embedding_size = subtokens_embeddings.shape
+        # The number of words in a sentence plus an "auxiliary" word in the beginnig.
+        n_words = torch.max(words_ids) + 1
+        words_embeddings = torch.zeros(
+            size=(batch_size, n_words, embedding_size),
+            dtype=subtokens_embeddings.dtype,
+            device=self.model.device
+        )
+        words_ids_expanded = words_ids.unsqueeze(-1).expand(batch_size, n_subtokens, embedding_size)
+        # Use scatter_reduce_ to average embeddings of subtokens corresponding to the same word.
+        # All the padding and special subtokens will be aggregated into an "auxiliary" first embedding,
+        # namely into words_embeddings[:, 0, :].
+        words_embeddings.scatter_reduce_(
+            dim=1,
+            index=words_ids_expanded,
+            src=subtokens_embeddings,
+            reduce="mean",
+            include_self=False
+        )
+        # Now remove the auxiliary word in the beginning.
+        words_embeddings = words_embeddings[:, 1:, :]
+        return words_embeddings
+    def get_embedding_size(self) -> int:
+        """Returns the embedding size of the transformer model, e.g. 768 for BERT."""
+        return self.model.config.hidden_size
+    def get_embeddings_layer(self):
+        """Returns the embeddings model."""
+        return self.model.embeddings
+    def get_transformer_layers(self) -> list[nn.Module]:
+        """
+        Return a flat list of all transformer-*block* layers, excluding embeddings/poolers, etc.
+        """
+        layers = []
+        for sub in self.model.modules():
+            # find all ModuleLists (these always hold the actual block layers)
+            if isinstance(sub, nn.ModuleList):
+                layers.extend(list(sub))
+        return layers

mlp_classifier.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+from torch import nn
+from torch import Tensor, LongTensor
+from transformers.activations import ACT2FN
+class MlpClassifier(nn.Module):
+    """ Simple feed-forward multilayer perceptron classifier. """
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        n_classes: int,
+        activation: str,
+        dropout: float,
+        class_weights: list[float] = None,
+    ):
+        super().__init__()
+        self.n_classes = n_classes
+        self.classifier = nn.Sequential(
+            nn.Dropout(dropout),
+            nn.Linear(input_size, hidden_size),
+            ACT2FN[activation],
+            nn.Dropout(dropout),
+            nn.Linear(hidden_size, n_classes)
+        )
+        if class_weights is not None:
+            class_weights = torch.tensor(class_weights, dtype=torch.long)
+        self.cross_entropy = nn.CrossEntropyLoss(weight=class_weights)
+    def forward(self, embeddings: Tensor, labels: LongTensor = None) -> dict:
+        logits = self.classifier(embeddings)
+        # Calculate loss.
+        loss = 0.0
+        if labels is not None:
+            # Reshape tensors to match expected dimensions
+            loss = self.cross_entropy(
+                logits.view(-1, self.n_classes),
+                labels.view(-1)
+            )
+        # Predictions.
+        preds = logits.argmax(dim=-1)
+        return {'preds': preds, 'loss': loss}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:febee3c2fe78451b6d0779baefe969f989827af002913b4f53382d6ec1220fee
+size 1164706348

modeling_parser.py ADDED Viewed

	@@ -0,0 +1,171 @@

+from torch import nn
+from torch import LongTensor
+from transformers import PreTrainedModel
+from .configuration import CobaldParserConfig
+from .encoder import WordTransformerEncoder
+from .mlp_classifier import MlpClassifier
+from .dependency_classifier import DependencyClassifier
+from .utils import (
+    build_padding_mask,
+    build_null_mask,
+    prepend_cls,
+    remove_nulls,
+    add_nulls
+)
+class CobaldParser(PreTrainedModel):
+    """Morpho-Syntax-Semantic Parser."""
+    config_class = CobaldParserConfig
+    def __init__(self, config: CobaldParserConfig):
+        super().__init__(config)
+        self.encoder = WordTransformerEncoder(
+            model_name=config.encoder_model_name
+        )
+        embedding_size = self.encoder.get_embedding_size()
+        self.classifiers = nn.ModuleDict()
+        self.classifiers["null"] = MlpClassifier(
+            input_size=self.encoder.get_embedding_size(),
+            hidden_size=config.null_classifier_hidden_size,
+            n_classes=config.consecutive_null_limit + 1,
+            activation=config.activation,
+            dropout=config.dropout
+        )
+        if "lemma_rule" in config.vocabulary:
+            self.classifiers["lemma_rule"] = MlpClassifier(
+                input_size=embedding_size,
+                hidden_size=config.lemma_classifier_hidden_size,
+                n_classes=len(config.vocabulary["lemma_rule"]),
+                activation=config.activation,
+                dropout=config.dropout
+            )
+        if "joint_feats" in config.vocabulary:
+            self.classifiers["joint_feats"] = MlpClassifier(
+                input_size=embedding_size,
+                hidden_size=config.morphology_classifier_hidden_size,
+                n_classes=len(config.vocabulary["joint_feats"]),
+                activation=config.activation,
+                dropout=config.dropout
+            )
+        if "ud_deprel" in config.vocabulary or "eud_deprel" in config.vocabulary:
+            self.classifiers["syntax"] = DependencyClassifier(
+                input_size=embedding_size,
+                hidden_size=config.dependency_classifier_hidden_size,
+                n_rels_ud=len(config.vocabulary["ud_deprel"]),
+                n_rels_eud=len(config.vocabulary["eud_deprel"]),
+                activation=config.activation,
+                dropout=config.dropout
+            )
+        if "misc" in config.vocabulary:
+            self.classifiers["misc"] = MlpClassifier(
+                input_size=embedding_size,
+                hidden_size=config.misc_classifier_hidden_size,
+                n_classes=len(config.vocabulary["misc"]),
+                activation=config.activation,
+                dropout=config.dropout
+            )
+        if "deepslot" in config.vocabulary:
+            self.classifiers["deepslot"] = MlpClassifier(
+                input_size=embedding_size,
+                hidden_size=config.deepslot_classifier_hidden_size,
+                n_classes=len(config.vocabulary["deepslot"]),
+                activation=config.activation,
+                dropout=config.dropout
+            )
+        if "semclass" in config.vocabulary:
+            self.classifiers["semclass"] = MlpClassifier(
+                input_size=embedding_size,
+                hidden_size=config.semclass_classifier_hidden_size,
+                n_classes=len(config.vocabulary["semclass"]),
+                activation=config.activation,
+                dropout=config.dropout
+            )
+    def forward(
+        self,
+        words: list[list[str]],
+        counting_masks: LongTensor = None,
+        lemma_rules: LongTensor = None,
+        joint_feats: LongTensor = None,
+        deps_ud: LongTensor = None,
+        deps_eud: LongTensor = None,
+        miscs: LongTensor = None,
+        deepslots: LongTensor = None,
+        semclasses: LongTensor = None,
+        sent_ids: list[str] = None,
+        texts: list[str] = None,
+        inference_mode: bool = False
+    ) -> dict:
+        output = {}
+        # Extra [CLS] token accounts for the case when #NULL is the first token in a sentence.
+        words_with_cls = prepend_cls(words)
+        words_without_nulls = remove_nulls(words_with_cls)
+        # Embeddings of words without nulls.
+        embeddings_without_nulls = self.encoder(words_without_nulls)
+        # Predict nulls.
+        null_output = self.classifiers["null"](embeddings_without_nulls, counting_masks)
+        output["counting_mask"] = null_output['preds']
+        output["loss"] = null_output["loss"]
+        # "Teacher forcing": during training, pass the original words (with gold nulls)
+        # to the classification heads, so that they are trained upon correct sentences.
+        if inference_mode:
+            # Restore predicted nulls in the original sentences.
+            output["words"] = add_nulls(words, null_output["preds"])
+        else:
+            output["words"] = words
+        # Encode words with nulls.
+        # [batch_size, seq_len, embedding_size]
+        embeddings = self.encoder(output["words"])
+        # Predict lemmas and morphological features.
+        if "lemma_rule" in self.classifiers:
+            lemma_output = self.classifiers["lemma_rule"](embeddings, lemma_rules)
+            output["lemma_rules"] = lemma_output['preds']
+            output["loss"] += lemma_output['loss']
+        if "joint_feats" in self.classifiers:
+            joint_feats_output = self.classifiers["joint_feats"](embeddings, joint_feats)
+            output["joint_feats"] = joint_feats_output['preds']
+            output["loss"] += joint_feats_output['loss']
+        # Predict syntax.
+        if "syntax" in self.classifiers:
+            padding_mask = build_padding_mask(output["words"], self.device)
+            null_mask = build_null_mask(output["words"], self.device)
+            deps_output = self.classifiers["syntax"](
+                embeddings,
+                deps_ud,
+                deps_eud,
+                null_mask,
+                padding_mask
+            )
+            output["deps_ud"] = deps_output['preds_ud']
+            output["deps_eud"] = deps_output['preds_eud']
+            output["loss"] += deps_output['loss_ud'] + deps_output['loss_eud']
+        # Predict miscellaneous features.
+        if "misc" in self.classifiers:
+            misc_output = self.classifiers["misc"](embeddings, miscs)
+            output["miscs"] = misc_output['preds']
+            output["loss"] += misc_output['loss']
+        # Predict semantics.
+        if "deepslot" in self.classifiers:
+            deepslot_output = self.classifiers["deepslot"](embeddings, deepslots)
+            output["deepslots"] = deepslot_output['preds']
+            output["loss"] += deepslot_output['loss']
+        if "semclass" in self.classifiers:
+            semclass_output = self.classifiers["semclass"](embeddings, semclasses)
+            output["semclasses"] = semclass_output['preds']
+            output["loss"] += semclass_output['loss']
+        return output

runs/Jun02_11-26-31_b20c304d4aee/events.out.tfevents.1748863678.b20c304d4aee.2886.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eddd2088faf8cac4a436178cb64c6c356d5827c7c000965e281975fa7a538139
+size 75520

runs/Jun02_11-29-35_b20c304d4aee/events.out.tfevents.1748863798.b20c304d4aee.3759.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30258dcc4cdce035babfb0897cfd1fce99d5062bc55d625d7adf6ef1a6512840
+size 75520

runs/Jun02_11-31-40_b20c304d4aee/events.out.tfevents.1748863923.b20c304d4aee.4331.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edaf96e6cfce003aec189d98126bd8fb9edf21f3d141d263f4db1346b0c6f6ac
+size 75520

runs/Jun02_11-39-26_b20c304d4aee/events.out.tfevents.1748864395.b20c304d4aee.6344.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9585e236154a2879b08dbc15b3237c6e656e60bbf31302cbdcc32993a9a5add8
+size 79755

runs/Jun02_11-41-53_b20c304d4aee/events.out.tfevents.1748864550.b20c304d4aee.7023.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3e2af7a645cc472b5e54ea09fb372999aaa1783101f270edf6081a31ecaa33b
+size 81553

runs/Jun02_11-56-41_b20c304d4aee/events.out.tfevents.1748865428.b20c304d4aee.10833.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e66c27937281332273136813d0fc467f224d417b54fa1cc597d499a546436f9
+size 81553

runs/Jun02_12-01-23_b20c304d4aee/events.out.tfevents.1748865720.b20c304d4aee.12053.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0be88191e4962ed67156cae4d329e9e09d27353bb4417d6ac99e949ea5d92564
+size 81553

runs/Jun02_12-03-50_b20c304d4aee/events.out.tfevents.1748865865.b20c304d4aee.12757.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:709139cf6f00317f591bd1da7d16d662f7d608c7aa91d298ee9b02112a53d51c
+size 79757

runs/Jun02_12-05-59_b20c304d4aee/events.out.tfevents.1748865998.b20c304d4aee.13334.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b5f75e015208d98ca0878082206cf1579de9258480fa6d1559cf29462ba7c64
+size 88206

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51f7e6a2220ee8f18ef289db60b31fa8ec735bb4c9ccd3fafebd3d7a812071a1
+size 5496

utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+from torch import Tensor
+def pad_sequences(sequences: list[Tensor], padding_value: int) -> Tensor:
+    """
+    Stack 1d tensors (sequences) into a single 2d tensor so that each sequence is padded on the
+    right.
+    """
+    return torch.nn.utils.rnn.pad_sequence(sequences, padding_value=padding_value, batch_first=True)
+def _build_condition_mask(sentences: list[list[str]], condition_fn: callable, device) -> Tensor:
+    masks = [
+        torch.tensor([condition_fn(word) for word in sentence], dtype=bool, device=device)
+        for sentence in sentences
+    ]
+    return pad_sequences(masks, padding_value=False)
+def build_padding_mask(sentences: list[list[str]], device) -> Tensor:
+    return _build_condition_mask(sentences, condition_fn=lambda word: True, device=device)
+def build_null_mask(sentences: list[list[str]], device) -> Tensor:
+    return _build_condition_mask(sentences, condition_fn=lambda word: word != "#NULL", device=device)
+def pairwise_mask(masks1d: Tensor) -> Tensor:
+    """
+    Calculate an outer product of a mask, i.e. masks2d[:, i, j] = masks1d[:, i] & masks1d[:, j].
+    """
+    return masks1d[:, None, :] & masks1d[:, :, None]
+# Credits: https://docs.allennlp.org/main/api/nn/util/#replace_masked_values
+def replace_masked_values(tensor: Tensor, mask: Tensor, replace_with: float):
+    """
+    Replace all masked values in tensor with `replace_with`.
+    """
+    assert tensor.dim() == mask.dim(), "tensor.dim() of {tensor.dim()} != mask.dim() of {mask.dim()}"
+    tensor.masked_fill_(~mask, replace_with)
+def prepend_cls(sentences: list[list[str]]) -> list[list[str]]:
+    """
+    Return a copy of sentences with [CLS] token prepended.
+    """
+    return [["[CLS]", *sentence] for sentence in sentences]
+def remove_nulls(sentences: list[list[str]]) -> list[list[str]]:
+    """
+    Return a copy of sentences with nulls removed.
+    """
+    return [[word for word in sentence if word != "#NULL"] for sentence in sentences]
+def add_nulls(sentences: list[list[str]], counting_mask) -> list[list[str]]:
+    """
+    Return a copy of sentences with nulls restored according to counting masks.
+    """
+    sentences_with_nulls = []
+    for sentence, counting_mask in zip(sentences, counting_mask, strict=True):
+        sentence_with_nulls = []
+        assert 0 < len(counting_mask)
+        # Account for leading (CLS) auxiliary token.
+        sentence_with_nulls.extend(["#NULL"] * counting_mask[0])
+        for word, n_nulls_to_insert in zip(sentence, counting_mask[1:], strict=True):
+            sentence_with_nulls.append(word)
+            sentence_with_nulls.extend(["#NULL"] * n_nulls_to_insert)
+        sentences_with_nulls.append(sentence_with_nulls)
+    return sentences_with_nulls