Spaces:
Runtime error
Runtime error
| def parse_verb_morphology(morph): | |
| word_pos = "Verb" | |
| offset = 7 | |
| # Tense Consumption | |
| if morph[3:7] == "PRES": | |
| tense = "Present" | |
| elif morph[3:7] == "IMPF": | |
| tense = "Imperfect" | |
| elif morph[3:7] == "PLUP": | |
| tense = "Pluperfect" | |
| elif morph[3:7] == "PERF": | |
| tense = "Pluperfect" | |
| elif morph[3:7] == "FUTP" and morph[3:13] != 'FUTPASSIVE': | |
| tense = "FuturePerfect" | |
| elif morph[3:6] == 'FUT': | |
| offset = 6 | |
| tense = "Future" | |
| elif morph == "V99XXX0X": | |
| # these should we just have a default? | |
| tense = "Undeclined" | |
| else: | |
| tense = "PROBLEM" | |
| # Voice consumption | |
| if morph[offset:offset+6] == "ACTIVE": | |
| voice = 'Active' | |
| offset += 6 | |
| elif morph[offset:offset+7] == "PASSIVE": | |
| voice = 'Passive' | |
| offset += 7 | |
| elif morph[offset:offset+3] == 'IND' or morph[offset:offset+3] == 'SUB' or morph[offset:offset+3] == 'INF' or morph[offset:offset+3] == 'IMP': | |
| # Deponent verbs - Such verbs occur in passive voice but are translated in active voice. | |
| voice = 'Active' | |
| else: | |
| voice = 'PROBLEM' | |
| # Mood consumption | |
| if morph[offset:offset+3] == "SUB": | |
| mood = 'Subjunctive' | |
| elif morph[offset:offset+3] == "IND": | |
| mood = 'Indicative' | |
| elif morph[offset:offset+3] == "IMP": | |
| mood = 'Imperative' | |
| elif morph[offset:offset+3] == "INF": | |
| mood = 'Infinitive' | |
| else: | |
| mood = 'PROBLEM' | |
| offset += 3 | |
| # Person consumption | |
| if morph[offset] == '0': | |
| person = 'Undeclined' | |
| elif morph[offset] == '1': | |
| person = 'First' | |
| elif morph[offset] == '2': | |
| person = 'Second' | |
| elif morph[offset] == '3': | |
| person = 'Third' | |
| else: | |
| person = "PROBLEM" | |
| offset += 1 | |
| # Number consumption | |
| if morph[offset] == 'S': | |
| number = 'Singular' | |
| elif morph[offset] == 'P': | |
| number = 'Plural' | |
| elif morph[offset] == 'X': | |
| number = 'Infinitive' | |
| else: | |
| number = 'PROBLEM' | |
| return { | |
| 'pos': word_pos, | |
| 'tense': tense, | |
| 'voice': voice, | |
| 'mood': mood, | |
| 'person': person, | |
| 'number': number | |
| } | |
| def parse_verb_participle_morphology(morph): | |
| word_pos = "Participle" | |
| offset = 6 | |
| # Case consumption | |
| if morph[offset:offset+3] == 'NOM': | |
| v_case = 'Nominative' | |
| elif morph[offset:offset+3] == 'GEN': | |
| v_case = 'Genitive' | |
| elif morph[offset:offset+3] == 'DAT': | |
| v_case = 'Dative' | |
| elif morph[offset:offset+3] == 'ACC': | |
| v_case = 'Accusative' | |
| elif morph[offset:offset+3] == 'ABL': | |
| v_case = 'Ablative' | |
| elif morph[offset:offset+3] == 'VOC': | |
| v_case = 'Vocative' | |
| elif morph[offset:offset+3] == 'LOC': | |
| v_case = 'Locative' | |
| else: | |
| v_case = "PROBLEM" | |
| offset += 3 | |
| # Number consumption | |
| if morph[offset] == 'S': | |
| number = 'Singular' | |
| elif morph[offset] == 'P': | |
| number = 'Plural' | |
| elif morph[offset] == 'X': | |
| number = 'Infinitive' | |
| else: | |
| number = 'PROBLEM' | |
| offset += 1 | |
| # Gender consumption | |
| if morph[offset] == 'M': | |
| gender = 'Masculine' | |
| elif morph[offset] == 'F': | |
| gender = 'Feminine' | |
| elif morph[offset] == 'N': | |
| gender = 'Neuter' | |
| elif morph[offset] == 'X': | |
| gender = 'Unknown' | |
| elif morph[offset] == 'C': | |
| gender = 'Common' | |
| else: | |
| gender = 'PROBLEM' | |
| offset += 1 | |
| if morph[offset:] == 'FUTPPL' or morph[offset:] == 'FUTACTIVEPPL': | |
| participle_type = 'FutureActive' | |
| elif morph[offset:] == 'PRESPPL' or morph[offset:] == 'PRESACTIVEPPL': | |
| participle_type = 'Present Active' | |
| elif morph[offset:] == 'PERFPPL' or morph[offset:] == 'PERFACTIVEPPL': | |
| participle_type = 'PerfectActive' | |
| elif morph[offset:] == 'FUTPASSIVEPPL': | |
| participle_type = 'FuturePassive' | |
| elif morph[offset:] == 'PERFPASSIVEPPL': | |
| participle_type = 'PerfectPassive' | |
| else: | |
| participle_type = 'PROBLEM' | |
| return { | |
| 'pos': word_pos, | |
| 'case': v_case, | |
| 'number': number, | |
| 'gender': gender, | |
| 'participle_type': participle_type | |
| } | |
| def parse_noun_morphology(morph): | |
| word_pos = "Noun" | |
| offset = 3 | |
| # Case consumption | |
| if morph[offset:offset+3] == 'NOM': | |
| v_case = 'Nominative' | |
| elif morph[offset:offset+3] == 'GEN': | |
| v_case = 'Genitive' | |
| elif morph[offset:offset+3] == 'DAT': | |
| v_case = 'Dative' | |
| elif morph[offset:offset+3] == 'ACC': | |
| v_case = 'Accusative' | |
| elif morph[offset:offset+3] == 'ABL': | |
| v_case = 'Ablative' | |
| elif morph[offset:offset+3] == 'VOC': | |
| v_case = 'Vocative' | |
| elif morph[offset:offset+3] == 'LOC': | |
| v_case = 'Locative' | |
| elif morph[offset] == 'X': | |
| offset -= 2 | |
| v_case = 'Undeclined' | |
| else: | |
| v_case = "PROBLEM" | |
| offset += 3 | |
| # Number consumption | |
| if morph[offset] == 'S': | |
| number = 'Singular' | |
| elif morph[offset] == 'P': | |
| number = 'Plural' | |
| elif morph[offset] == 'X': | |
| number = 'Infinitive' | |
| else: | |
| number = 'PROBLEM' | |
| offset += 1 | |
| # Gender consumption | |
| if morph[offset] == 'M': | |
| gender = 'Masculine' | |
| elif morph[offset] == 'F': | |
| gender = 'Feminine' | |
| elif morph[offset] == 'N': | |
| gender = 'Neuter' | |
| elif morph[offset] == 'X': | |
| gender = 'Unknown' | |
| elif morph[offset] == 'C': | |
| gender = 'Common' | |
| else: | |
| gender = 'PROBLEM' | |
| return { | |
| 'pos': word_pos, | |
| 'case': v_case, | |
| 'number': number, | |
| 'gender': gender, | |
| } | |
| def parse_adjective_morphology(morph): | |
| word_pos = "Adjective" | |
| offset = 5 | |
| # Case consumption | |
| if morph[offset:offset+3] == 'NOM': | |
| v_case = 'Nominative' | |
| elif morph[offset:offset+3] == 'GEN': | |
| v_case = 'Genitive' | |
| elif morph[offset:offset+3] == 'DAT': | |
| v_case = 'Dative' | |
| elif morph[offset:offset+3] == 'ACC': | |
| v_case = 'Accusative' | |
| elif morph[offset:offset+3] == 'ABL': | |
| v_case = 'Ablative' | |
| elif morph[offset:offset+3] == 'VOC': | |
| v_case = 'Vocative' | |
| elif morph[offset:offset+3] == 'LOC': | |
| v_case = 'Locative' | |
| elif morph[offset] == 'X': | |
| offset -= 2 | |
| v_case = 'Undeclined' | |
| else: | |
| v_case = "PROBLEM" | |
| offset += 3 | |
| # Number consumption | |
| if morph[offset] == 'S': | |
| number = 'Singular' | |
| elif morph[offset] == 'P': | |
| number = 'Plural' | |
| elif morph[offset] == 'X': | |
| number = 'Infinitive' | |
| else: | |
| number = 'PROBLEM' | |
| offset += 1 | |
| # Gender consumption | |
| if morph[offset] == 'M': | |
| gender = 'Masculine' | |
| elif morph[offset] == 'F': | |
| gender = 'Feminine' | |
| elif morph[offset] == 'N': | |
| gender = 'Neuter' | |
| elif morph[offset] == 'X': | |
| gender = 'Unknown' | |
| elif morph[offset] == 'C': | |
| gender = 'Common' | |
| else: | |
| gender = 'PROBLEM' | |
| offset += 1 | |
| # Comparison consumption | |
| if morph[offset:] == 'POS': | |
| comparison = "Positive" | |
| elif morph[offset:] == 'COMP': | |
| comparison = "Comparative" | |
| elif morph[offset:] == 'SUPER': | |
| comparison = "Superlative" | |
| elif morph[offset:] == 'X': | |
| comparison = 'Unknown' | |
| else: | |
| comparison = "PROBLEM" | |
| print(morph) | |
| return { | |
| 'pos': word_pos, | |
| 'case': v_case, | |
| 'number': number, | |
| 'gender': gender, | |
| 'comparison': comparison | |
| } | |
| # PRON31NOMPM | |
| def parse_pronoun_morphology(morph): | |
| word_pos = "Pronoun" | |
| offset = 6 | |
| # Case consumption | |
| if morph[offset:offset+3] == 'NOM': | |
| v_case = 'Nominative' | |
| elif morph[offset:offset+3] == 'GEN': | |
| v_case = 'Genitive' | |
| elif morph[offset:offset+3] == 'DAT': | |
| v_case = 'Dative' | |
| elif morph[offset:offset+3] == 'ACC': | |
| v_case = 'Accusative' | |
| elif morph[offset:offset+3] == 'ABL': | |
| v_case = 'Ablative' | |
| elif morph[offset:offset+3] == 'VOC': | |
| v_case = 'Vocative' | |
| elif morph[offset:offset+3] == 'LOC': | |
| v_case = 'Locative' | |
| elif morph[offset] == 'X': | |
| offset -= 2 | |
| v_case = 'Undeclined' | |
| else: | |
| v_case = "PROBLEM" | |
| offset += 3 | |
| # Number consumption | |
| if morph[offset] == 'S': | |
| number = 'Singular' | |
| elif morph[offset] == 'P': | |
| number = 'Plural' | |
| elif morph[offset] == 'X': | |
| number = 'Infinitive' | |
| else: | |
| number = 'PROBLEM' | |
| offset += 1 | |
| # Gender consumption | |
| if morph[offset] == 'M': | |
| gender = 'Masculine' | |
| elif morph[offset] == 'F': | |
| gender = 'Feminine' | |
| elif morph[offset] == 'N': | |
| gender = 'Neuter' | |
| elif morph[offset] == 'X': | |
| gender = 'Unknown' | |
| elif morph[offset] == 'C': | |
| gender = 'Common' | |
| else: | |
| gender = 'PROBLEM' | |
| return { | |
| 'pos': word_pos, | |
| 'case': v_case, | |
| 'number': number, | |
| 'gender': gender, | |
| } | |
| def parse_preposition_morphology(morph): | |
| offset = 4 | |
| word_pos = "Preposition" | |
| if morph[offset:] == 'ABL': | |
| v_case = 'Ablative' | |
| elif morph[offset:] == 'ACC': | |
| v_case = 'Accusative' | |
| else: | |
| print(morph[offset:]) | |
| v_case = 'PROBLEM' | |
| return { | |
| 'pos': word_pos, | |
| 'case': v_case | |
| } | |
| def parse_adverb_morphology(morph): | |
| word_pos = "Adverb" | |
| offset = 3 | |
| if len(morph[offset:]) < 1: | |
| comparison = "Positive" | |
| # Comparison consumption | |
| elif morph[offset:] == 'POS': | |
| comparison = "Positive" | |
| elif morph[offset:] == 'COMP': | |
| comparison = "Comparative" | |
| elif morph[offset:] == 'SUPER': | |
| comparison = "Superlative" | |
| elif morph[offset:] == 'X': | |
| comparison = 'Unknown' | |
| else: | |
| comparison = "PROBLEM" | |
| print(morph) | |
| return { | |
| 'pos': word_pos, | |
| 'comparison': comparison | |
| } | |
| # PRON31NOMPM | |
| def parse_supine_morphology(morph): | |
| word_pos = "Supine" | |
| offset = 8 | |
| # Case consumption | |
| if morph[offset:offset+3] == 'NOM': | |
| v_case = 'Nominative' | |
| elif morph[offset:offset+3] == 'GEN': | |
| v_case = 'Genitive' | |
| elif morph[offset:offset+3] == 'DAT': | |
| v_case = 'Dative' | |
| elif morph[offset:offset+3] == 'ACC': | |
| v_case = 'Accusative' | |
| elif morph[offset:offset+3] == 'ABL': | |
| v_case = 'Ablative' | |
| elif morph[offset:offset+3] == 'VOC': | |
| v_case = 'Vocative' | |
| elif morph[offset:offset+3] == 'LOC': | |
| v_case = 'Locative' | |
| elif morph[offset] == 'X': | |
| offset -= 2 | |
| v_case = 'Undeclined' | |
| else: | |
| v_case = "PROBLEM" | |
| offset += 3 | |
| # Number consumption | |
| if morph[offset] == 'S': | |
| number = 'Singular' | |
| elif morph[offset] == 'P': | |
| number = 'Plural' | |
| elif morph[offset] == 'X': | |
| number = 'Infinitive' | |
| else: | |
| number = 'PROBLEM' | |
| offset += 1 | |
| # Gender consumption | |
| if morph[offset] == 'M': | |
| gender = 'Masculine' | |
| elif morph[offset] == 'F': | |
| gender = 'Feminine' | |
| elif morph[offset] == 'N': | |
| gender = 'Neuter' | |
| elif morph[offset] == 'X': | |
| gender = 'Unknown' | |
| elif morph[offset] == 'C': | |
| gender = 'Common' | |
| else: | |
| gender = 'PROBLEM' | |
| return { | |
| 'pos': word_pos, | |
| 'case': v_case, | |
| 'number': number, | |
| 'gender': gender, | |
| } | |
| def parse_morphology(morph): | |
| if len(morph) < 2: | |
| return {} | |
| # Participle | |
| if morph[0:4] == "VPAR": | |
| return parse_verb_participle_morphology(morph) | |
| # Adjective | |
| elif morph[0:3] == 'ADJ': | |
| return parse_adjective_morphology(morph) | |
| # Adverb | |
| elif morph[0:3] == 'ADV': | |
| return parse_adverb_morphology(morph) | |
| # Verb | |
| elif morph[0] == 'V' and morph[1] in [str(p) for p in range(9)]: | |
| return parse_verb_morphology(morph) | |
| # Noun | |
| elif morph[0] == 'N': | |
| return parse_noun_morphology(morph) | |
| # Pronoun | |
| elif morph[0:4] == 'PRON': | |
| return parse_pronoun_morphology(morph) | |
| # Preposition | |
| elif morph[0:4] == 'PREP': | |
| return parse_preposition_morphology(morph) | |
| # Supine | |
| elif morph[0:6] == 'SUPINE': | |
| return parse_supine_morphology(morph) | |
| elif morph[0:6] == 'INTERJ': | |
| return { | |
| 'pos': 'Interjection' | |
| } | |
| elif morph in ['TACKON', 'PREFIX', 'SUFFIX']: | |
| return { | |
| 'pos': 'Enclitic' | |
| } | |
| elif morph in ['OTHER', 'CONJ']: | |
| return { | |
| 'pos': morph.capitalize() | |
| } | |
| def morph_to_string(morph): | |
| if morph == {} or morph is None: | |
| return "" | |
| if morph['pos'] == 'Verb': | |
| return 'Verb_' + morph['tense'] + "_" + morph['voice'] + "_" + morph['mood'] + "_" + morph['person'] + "_" + morph['number'] | |
| elif morph['pos'] == 'Participle': | |
| return "Participle_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] + "_" + morph['participle_type'] | |
| elif morph['pos'] == 'Noun': | |
| return "Noun_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] | |
| elif morph['pos'] == 'Adjective': | |
| return 'Adjective_' + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] + '_' + morph['comparison'] | |
| elif morph['pos'] == 'Pronoun': | |
| return "Pronoun_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] | |
| elif morph['pos'] == 'Preposition': | |
| return "Preposition_" + morph['case'] | |
| elif morph['pos'] == 'Adverb': | |
| return "Adverb_" + morph['comparison'] | |
| elif morph['pos'] == 'Supine': | |
| return "Supine_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] | |
| elif morph['pos'] == 'Enclitic': | |
| return 'Enclitic' | |
| elif morph['pos'] == 'Other': | |
| return 'OTHER' | |
| elif morph['pos'] == 'Conj': | |
| return 'Conjunction' | |
| elif morph['pos'] == 'Interjection': | |
| return 'Interjection' | |
| def simplify_form(morph): | |
| return morph_to_string(parse_morphology(morph)) |