Update app.py
Browse files
app.py
CHANGED
|
@@ -15,28 +15,33 @@ libraries = ["langdetect", "langid", "lingua-py", "fasttlextangdetect"]
|
|
| 15 |
class Detect():
|
| 16 |
def __init__(self, text: str) -> None:
|
| 17 |
self.text: str = text
|
| 18 |
-
def langdetect(self) ->
|
| 19 |
from langdetect import detect, detect_langs
|
| 20 |
from langdetect import DetectorFactory
|
| 21 |
DetectorFactory.seed = 0
|
| 22 |
langcode = detect(self.text)
|
| 23 |
langecode_probabilities: list[Language] = detect_langs(self.text)
|
| 24 |
return [langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)]
|
| 25 |
-
def langid(self) ->
|
| 26 |
from langid.langid import LanguageIdentifier, model
|
| 27 |
identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
|
| 28 |
idresult: list[str, float] = list(identifier.classify(self.text))
|
| 29 |
return [idresult[0], abs(round(number=idresult[1] * 100, ndigits=2))]
|
| 30 |
-
def lingua(self) ->
|
| 31 |
from lingua import Language, LanguageDetectorBuilder
|
| 32 |
detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
|
| 33 |
confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
|
| 34 |
return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
|
| 35 |
-
def fasttlextangdetect(self) ->
|
| 36 |
from ftlangdetect import detect
|
| 37 |
result = detect(text=self.text, low_memory=False)
|
| 38 |
-
return [result.get('lang'), result.get('score')]
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
|
| 41 |
"""
|
| 42 |
Detects the language of the input text.
|
|
@@ -64,7 +69,7 @@ def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, st
|
|
| 64 |
if 'lingua-py' in used_libraries:
|
| 65 |
detections.append(detectinstance.lingua())
|
| 66 |
if 'fasttlextangdetect' in used_libraries:
|
| 67 |
-
detections.append(detectinstance.
|
| 68 |
print(detections)
|
| 69 |
return detections
|
| 70 |
|
|
|
|
| 15 |
class Detect():
|
| 16 |
def __init__(self, text: str) -> None:
|
| 17 |
self.text: str = text
|
| 18 |
+
def langdetect(self) -> list[str, float]:
|
| 19 |
from langdetect import detect, detect_langs
|
| 20 |
from langdetect import DetectorFactory
|
| 21 |
DetectorFactory.seed = 0
|
| 22 |
langcode = detect(self.text)
|
| 23 |
langecode_probabilities: list[Language] = detect_langs(self.text)
|
| 24 |
return [langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)]
|
| 25 |
+
def langid(self) -> list[str, float]:
|
| 26 |
from langid.langid import LanguageIdentifier, model
|
| 27 |
identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
|
| 28 |
idresult: list[str, float] = list(identifier.classify(self.text))
|
| 29 |
return [idresult[0], abs(round(number=idresult[1] * 100, ndigits=2))]
|
| 30 |
+
def lingua(self) -> list[str, float]:
|
| 31 |
from lingua import Language, LanguageDetectorBuilder
|
| 32 |
detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
|
| 33 |
confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
|
| 34 |
return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
|
| 35 |
+
def fasttlextangdetect(self) -> list[str, float]:
|
| 36 |
from ftlangdetect import detect
|
| 37 |
result = detect(text=self.text, low_memory=False)
|
| 38 |
+
return [result.get('lang'), abs(round(number=result.get('score') * 100, ndigits=2))]
|
| 39 |
+
def fasttangdetect(self) -> list[str, float]:
|
| 40 |
+
from fast_langdetect import detect
|
| 41 |
+
result = detect(text=self.text, model="auto", k=1)
|
| 42 |
+
print(result)
|
| 43 |
+
return [result.get('lang'), abs(round(number=result.get('score') * 100, ndigits=2))]
|
| 44 |
+
|
| 45 |
def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
|
| 46 |
"""
|
| 47 |
Detects the language of the input text.
|
|
|
|
| 69 |
if 'lingua-py' in used_libraries:
|
| 70 |
detections.append(detectinstance.lingua())
|
| 71 |
if 'fasttlextangdetect' in used_libraries:
|
| 72 |
+
detections.append(detectinstance.fasttangdetect())
|
| 73 |
print(detections)
|
| 74 |
return detections
|
| 75 |
|