Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Rom
|
|
| 10 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 11 |
DEFAULTS = None
|
| 12 |
|
| 13 |
-
libraries = ["langdetect", "langid", "lingua-py"]
|
| 14 |
|
| 15 |
class Detect():
|
| 16 |
def __init__(self, text: str) -> None:
|
|
@@ -21,7 +21,7 @@ class Detect():
|
|
| 21 |
DetectorFactory.seed = 0
|
| 22 |
langcode = detect(self.text)
|
| 23 |
langecode_probabilities: list[Language] = detect_langs(self.text)
|
| 24 |
-
return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
|
| 25 |
def langid(self) -> tuple[str, float]:
|
| 26 |
from langid.langid import LanguageIdentifier, model
|
| 27 |
identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
|
|
@@ -32,6 +32,10 @@ class Detect():
|
|
| 32 |
detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
|
| 33 |
confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
|
| 34 |
return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
|
| 37 |
"""
|
|
@@ -51,16 +55,16 @@ def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, st
|
|
| 51 |
"""
|
| 52 |
detectinstance = Detect(input_text)
|
| 53 |
detections = []
|
|
|
|
|
|
|
| 54 |
if 'langdetect' in used_libraries:
|
| 55 |
-
|
| 56 |
-
listtoappend = [langcode, confidence_score]
|
| 57 |
-
detections.append(listtoappend)
|
| 58 |
if 'langid' in used_libraries:
|
| 59 |
-
|
| 60 |
-
detections.append(listtoappend)
|
| 61 |
if 'lingua-py' in used_libraries:
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
print(detections)
|
| 65 |
return detections
|
| 66 |
|
|
@@ -76,7 +80,7 @@ with gr.Blocks() as interface:
|
|
| 76 |
type='array',
|
| 77 |
row_count=len(libraries),
|
| 78 |
column_count=2,
|
| 79 |
-
column_limits=(2,
|
| 80 |
label='Language detection dataframe'
|
| 81 |
)
|
| 82 |
input_text.submit(
|
|
|
|
| 10 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 11 |
DEFAULTS = None
|
| 12 |
|
| 13 |
+
libraries = ["langdetect", "langid", "lingua-py", "fasttlextangdetect"]
|
| 14 |
|
| 15 |
class Detect():
|
| 16 |
def __init__(self, text: str) -> None:
|
|
|
|
| 21 |
DetectorFactory.seed = 0
|
| 22 |
langcode = detect(self.text)
|
| 23 |
langecode_probabilities: list[Language] = detect_langs(self.text)
|
| 24 |
+
return [langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)]
|
| 25 |
def langid(self) -> tuple[str, float]:
|
| 26 |
from langid.langid import LanguageIdentifier, model
|
| 27 |
identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
|
|
|
|
| 32 |
detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
|
| 33 |
confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
|
| 34 |
return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
|
| 35 |
+
def fasttlextangdetect(self) -> tuple[str, float]:
|
| 36 |
+
from ftlangdetect import detect
|
| 37 |
+
result = detect(text=self.text, low_memory=False)
|
| 38 |
+
return [result.get('lang'), result.get('score')]
|
| 39 |
|
| 40 |
def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
|
| 41 |
"""
|
|
|
|
| 55 |
"""
|
| 56 |
detectinstance = Detect(input_text)
|
| 57 |
detections = []
|
| 58 |
+
if not input_text or not used_libraries:
|
| 59 |
+
return [['No input text or library selected', 'Please provide input text and/or select a detection library']]
|
| 60 |
if 'langdetect' in used_libraries:
|
| 61 |
+
detections.append(detectinstance.langdetect())
|
|
|
|
|
|
|
| 62 |
if 'langid' in used_libraries:
|
| 63 |
+
detections.append(detectinstance.langid())
|
|
|
|
| 64 |
if 'lingua-py' in used_libraries:
|
| 65 |
+
detections.append(detectinstance.lingua())
|
| 66 |
+
if 'fasttlextangdetect' in used_libraries:
|
| 67 |
+
detections.append(detectinstance.fasttlextangdetect())
|
| 68 |
print(detections)
|
| 69 |
return detections
|
| 70 |
|
|
|
|
| 80 |
type='array',
|
| 81 |
row_count=len(libraries),
|
| 82 |
column_count=2,
|
| 83 |
+
column_limits=(2, 4),
|
| 84 |
label='Language detection dataframe'
|
| 85 |
)
|
| 86 |
input_text.submit(
|