Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Rom
|
|
| 10 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 11 |
DEFAULTS = None
|
| 12 |
|
| 13 |
-
libraries = ["langdetect", "langid"]
|
| 14 |
|
| 15 |
class Detect():
|
| 16 |
def __init__(self, text: str) -> None:
|
|
@@ -22,12 +22,16 @@ class Detect():
|
|
| 22 |
langcode = detect(self.text)
|
| 23 |
langecode_probabilities: list[Language] = detect_langs(self.text)
|
| 24 |
return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
|
| 25 |
-
|
| 26 |
def langid(self) -> tuple[str, float]:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
|
| 33 |
"""
|
|
@@ -52,10 +56,10 @@ def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, st
|
|
| 52 |
listtoappend = [langcode, confidence_score]
|
| 53 |
detections.append(listtoappend)
|
| 54 |
if 'langid' in used_libraries:
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
listtoappend =
|
| 59 |
detections.append(listtoappend)
|
| 60 |
print(detections)
|
| 61 |
return detections
|
|
|
|
| 10 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 11 |
DEFAULTS = None
|
| 12 |
|
| 13 |
+
libraries = ["langdetect", "langid", "lingua-py"]
|
| 14 |
|
| 15 |
class Detect():
|
| 16 |
def __init__(self, text: str) -> None:
|
|
|
|
| 22 |
langcode = detect(self.text)
|
| 23 |
langecode_probabilities: list[Language] = detect_langs(self.text)
|
| 24 |
return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
|
|
|
|
| 25 |
def langid(self) -> tuple[str, float]:
|
| 26 |
+
from langid.langid import LanguageIdentifier, model
|
| 27 |
+
identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
|
| 28 |
+
idresult: list[str, float] = list(identifier.classify(self.text))
|
| 29 |
+
return [idresult[0], abs(round(number=idresult[1] * 100, ndigits=2))]
|
| 30 |
+
def lingua(self) -> tuple[str, float]:
|
| 31 |
+
from lingua import Language, LanguageDetectorBuilder
|
| 32 |
+
detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
|
| 33 |
+
confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
|
| 34 |
+
return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
|
| 35 |
|
| 36 |
def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
|
| 37 |
"""
|
|
|
|
| 56 |
listtoappend = [langcode, confidence_score]
|
| 57 |
detections.append(listtoappend)
|
| 58 |
if 'langid' in used_libraries:
|
| 59 |
+
listtoappend = detectinstance.langid()
|
| 60 |
+
detections.append(listtoappend)
|
| 61 |
+
if 'lingua-py' in used_libraries:
|
| 62 |
+
listtoappend = detectinstance.lingua()
|
| 63 |
detections.append(listtoappend)
|
| 64 |
print(detections)
|
| 65 |
return detections
|