LanguageDetection

Sleeping

App Files Files Community

TiberiuCristianLeon commited on Jan 5

Commit

6f0b2fb

verified ·

1 Parent(s): 5a601ab

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -10

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Rom
 iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
 DEFAULTS = None
-libraries = ["langdetect", "langid", "lingua-py"]
 class Detect():
     def __init__(self, text: str) -> None:
@@ -21,7 +21,7 @@ class Detect():
         DetectorFactory.seed = 0
         langcode = detect(self.text)
         langecode_probabilities: list[Language] = detect_langs(self.text)
-        return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
     def langid(self) -> tuple[str, float]:
         from langid.langid import LanguageIdentifier, model
         identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
@@ -32,6 +32,10 @@ class Detect():
         detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
         confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
         return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
 def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
     """
@@ -51,16 +55,16 @@ def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, st
     """
     detectinstance = Detect(input_text)
     detections = []
     if 'langdetect' in used_libraries:
-        langcode, confidence_score = detectinstance.langdetect()
-        listtoappend = [langcode, confidence_score]
-        detections.append(listtoappend)
     if 'langid' in used_libraries:
-        listtoappend = detectinstance.langid()
-        detections.append(listtoappend)
     if 'lingua-py' in used_libraries:
-        listtoappend = detectinstance.lingua()
-        detections.append(listtoappend)
     print(detections)
     return detections
@@ -76,7 +80,7 @@ with gr.Blocks() as interface:
             type='array',
             row_count=len(libraries),
             column_count=2,
-            column_limits=(2, 3),
             label='Language detection dataframe'
         )
     input_text.submit(

 iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
 DEFAULTS = None
+libraries = ["langdetect", "langid", "lingua-py", "fasttlextangdetect"]
 class Detect():
     def __init__(self, text: str) -> None:
         DetectorFactory.seed = 0
         langcode = detect(self.text)
         langecode_probabilities: list[Language] = detect_langs(self.text)
+        return [langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)]
     def langid(self) -> tuple[str, float]:
         from langid.langid import LanguageIdentifier, model
         identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
         detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
         confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
         return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
+    def fasttlextangdetect(self) -> tuple[str, float]:
+        from ftlangdetect import detect
+        result = detect(text=self.text, low_memory=False)
+        return [result.get('lang'), result.get('score')]
 def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
     """
     """
     detectinstance = Detect(input_text)
     detections = []
+    if not input_text or not used_libraries:
+        return [['No input text or library selected', 'Please provide input text and/or select a detection library']]
     if 'langdetect' in used_libraries:
+        detections.append(detectinstance.langdetect())
     if 'langid' in used_libraries:
+        detections.append(detectinstance.langid())
     if 'lingua-py' in used_libraries:
+        detections.append(detectinstance.lingua())
+    if 'fasttlextangdetect' in used_libraries:
+        detections.append(detectinstance.fasttlextangdetect())
     print(detections)
     return detections
             type='array',
             row_count=len(libraries),
             column_count=2,
+            column_limits=(2, 4),
             label='Language detection dataframe'
         )
     input_text.submit(