Spaces:

AUEB-NLP
/

The-Greek-NLP-API

Sleeping

App Files Files Community

eloukas commited on Sep 1, 2024

Commit

92218bf

1 Parent(s): 1430cec

Add files

Browse files

Files changed (1) hide show

test_run.py +181 -0

test_run.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# !pip install gr-nlp-toolkit
+from gr_nlp_toolkit import Pipeline
+# Instantiate the Pipeline
+nlp_pos_ner_dp_with_g2g = Pipeline("pos,ner,dp,g2g")
+def greeklish_to_greek(text: str) -> str:
+    """
+    Convert Greeklish (Greek written with Latin characters) to Greek. ("larisa" -> "λαρισα")
+    Args:
+        text (str): The Greeklish text to convert.
+    Returns:
+        str: The transliterated Greek text.
+    Examples:
+        >>> greeklish_to_greek("H thessaloniki einai wraia polh")
+        'η θεσσαλονικη ειναι ωραια πολη'
+    """
+    doc = nlp_pos_ner_dp_with_g2g(text)
+    return " ".join([token.text for token in doc.tokens])
+def process_ner(text: str) -> dict:
+    """
+    Process text to extract Named Entity Recognition (NER) information.
+    Args:
+        text (str): The text to process.
+    Returns:
+        dict: A dictionary with the text and the NER value.
+    Examples:
+        >>> process_ner("Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022")
+        {
+            'η': 'O',
+            'αργεντινη': 'S-ORG',
+            'κερδισε': 'O',
+            'το': 'O',
+            'παγκοσμιο': 'B-EVENT',
+            'κυπελλο': 'E-EVENT',
+            'το': 'O',
+            '2022': 'S-DATE'
+        }
+    NER Possible Labels List:
+        ner_labels = [
+            'O', 'S-GPE', 'S-ORG', 'S-CARDINAL', 'B-ORG', 'E-ORG', 'B-DATE', 'E-DATE', 'S-NORP',
+            'B-GPE', 'E-GPE', 'S-EVENT', 'S-DATE', 'S-PRODUCT', 'S-LOC', 'I-ORG', 'S-PERSON',
+            'S-ORDINAL', 'B-PERSON', 'I-PERSON', 'E-PERSON', 'B-LAW', 'I-LAW', 'E-LAW', 'B-MONEY',
+            'I-MONEY', 'E-MONEY', 'B-EVENT', 'I-EVENT', 'E-EVENT', 'B-FAC', 'E-FAC', 'I-DATE',
+            'S-PERCENT', 'B-QUANTITY', 'E-QUANTITY', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'E-WORK_OF_ART',
+            'I-FAC', 'S-LAW', 'S-TIME', 'B-LOC', 'E-LOC', 'I-LOC', 'S-FAC', 'B-TIME', 'E-TIME',
+            'S-WORK_OF_ART', 'B-PRODUCT', 'E-PRODUCT', 'B-CARDINAL', 'E-CARDINAL', 'S-MONEY',
+            'S-LANGUAGE', 'I-TIME', 'I-PRODUCT', 'I-GPE', 'I-QUANTITY', 'B-NORP', 'E-NORP',
+            'S-QUANTITY', 'B-PERCENT', 'I-PERCENT', 'E-PERCENT', 'I-CARDINAL', 'B-ORDINAL',
+            'I-ORDINAL', 'E-ORDINAL'
+        ]
+    """
+    doc = nlp_pos_ner_dp_with_g2g(text)
+    ner_dict = {token.text: token.ner for token in doc.tokens}
+    return ner_dict
+def process_pos(text: str) -> dict:
+    """
+    Process text to extract Part-of-Speech information (UPOS tags and morphological features).
+    # Complete list of UPOS (https://universaldependencies.org/u/pos/ & https://github.com/nlpaueb/gr-nlp-toolkit/blob/main/gr_nlp_toolkit/configs/pos_labels.py)
+        ADJ: adjective
+        ADP: adposition
+        ADV: adverb
+        AUX: auxiliary
+        CCONJ: coordinating conjunction
+        DET: determiner
+        INTJ: interjection
+        NOUN: noun
+        NUM: numeral
+        PART: particle
+        PRON: pronoun
+        PROPN: proper noun
+        PUNCT: punctuation
+        SCONJ: subordinating conjunction
+        SYM: symbol
+        VERB: verb
+        X: other
+    # Complete list of the morphological features can be found here: (https://github.com/nlpaueb/gr-nlp-toolkit/blob/main/gr_nlp_toolkit/configs/pos_labels.py
+    Due to the large number of features, only the most common ones are listed here:
+        - Aspect
+        - Case
+        - Definite
+        - Mood
+        - Number
+        - Person
+        - PronType
+        - Tense
+        - Gender
+        - VerbForm
+        - Voice
+    Args:
+        text (str): The text to process.
+    Returns:
+        dict: A dictionary with the text and the POS information, containing UPOS and morphological features as keys.
+    Examples:
+         >>> process_pos("Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter.")
+        {
+            'μου': {'UPOS': 'PRON', 'Morphological_Features': {'Case': 'Gen', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '1', 'Poss': '_', 'PronType': 'Prs'}},
+            'αρεσει': {'UPOS': 'VERB', 'Morphological_Features': {'Aspect': 'Imp', 'Case': '_', 'Gender': '_', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'}},
+            'να': {'UPOS': 'AUX', 'Morphological_Features': {'Aspect': '_', 'Mood': '_', 'Number': '_', 'Person': '_', 'Tense': '_', 'VerbForm': '_', 'Voice': '_'}},
+            'διαβαζω': {'UPOS': 'VERB', 'Morphological_Features': {'Aspect': 'Imp', 'Case': '_', 'Gender': '_', 'Mood': 'Ind', 'Number': 'Sing', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin', 'Voice': 'Act'}},
+            'τα': {'UPOS': 'DET', 'Morphological_Features': {'Case': 'Acc', 'Definite': 'Def', 'Gender': 'Neut', 'Number': 'Plur', 'PronType': 'Art'}},
+            'post': {'UPOS': 'X', 'Morphological_Features': {'Foreign': 'Yes'}},
+            'του': {'UPOS': 'DET', 'Morphological_Features': {'Case': 'Gen', 'Definite': 'Def', 'Gender': 'Masc', 'Number': 'Sing', 'PronType': 'Art'}},
+            'andrew': {'UPOS': 'X', 'Morphological_Features': {'Foreign': 'Yes'}},
+            'ng': {'UPOS': 'X', 'Morphological_Features': {'Foreign': 'Yes'}},
+            'στο': {'UPOS': '_', 'Morphological_Features': {}},
+            'twitter': {'UPOS': 'X', 'Morphological_Features': {'Foreign': 'Yes'}},
+            '.': {'UPOS': 'PUNCT', 'Morphological_Features': {}}
+        }
+    """
+    doc = nlp_pos_ner_dp_with_g2g(text)
+    pos_dict = {
+        token.text: {"UPOS": token.upos, "Morphological_Features": token.feats}
+        for token in doc.tokens
+    }
+    return pos_dict
+def process_dp(text: str) -> dict:
+    """
+    Process text to extract Dependency Parsing information.
+    This method analyzes the given text and returns dependency parsing information for each word,
+    including its syntactic head and dependency relation.
+    Args:
+        text (str): The text to process.
+    Returns:
+        dict: A dictionary where each key is a word from the input text, and the value is another
+            dictionary containing:
+                - 'Head': The position of the syntactic head of the word (0 indicates the root).
+                - 'Deprel': The dependency relation to the head.
+    Examples:
+        >>> process_dp("Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη.")
+        {
+            'προτιμω': {'Head': 0, 'Deprel': 'root'},
+            'την': {'Head': 4, 'Deprel': 'det'},
+            'πρωινη': {'Head': 4, 'Deprel': 'amod'},
+            'πτηση': {'Head': 1, 'Deprel': 'obj'},
+            'απο': {'Head': 7, 'Deprel': 'case'},
+            'την': {'Head': 7, 'Deprel': 'det'},
+            'αθηνα': {'Head': 4, 'Deprel': 'nmod'},
+            'στη': {'Head': 9, 'Deprel': 'case'},
+            'θεσσαλονικη': {'Head': 4, 'Deprel': 'nmod'},
+            '.': {'Head': 1, 'Deprel': 'punct'}
+        }
+    Dependency Parsing Possible Labels List:
+        dp_labels = [
+            'obl', 'obj', 'dep', 'mark', 'case', 'flat', 'nummod', 'obl:arg', 'punct', 'cop',
+            'acl:relcl', 'expl', 'nsubj', 'csubj:pass', 'root', 'advmod', 'nsubj:pass', 'ccomp',
+            'conj', 'amod', 'xcomp', 'aux', 'appos', 'csubj', 'fixed', 'nmod', 'iobj', 'parataxis',
+            'orphan', 'det', 'advcl', 'vocative', 'compound', 'cc', 'discourse', 'acl', 'obl:agent'
+        ]
+    """
+    doc = nlp_pos_ner_dp_with_g2g(text)
+    dp_dict = {
+        token.text: {"Head": token.head, "Deprel": token.deprel} for token in doc.tokens
+    }
+    return dp_dict