TiberiuCristianLeon commited on
Commit
2d2ddee
·
verified ·
1 Parent(s): c405a51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -10,7 +10,7 @@ all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Rom
10
  iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
11
  DEFAULTS = None
12
 
13
- libraries = ["langdetect", "langid"]
14
 
15
  class Detect():
16
  def __init__(self, text: str) -> None:
@@ -22,12 +22,16 @@ class Detect():
22
  langcode = detect(self.text)
23
  langecode_probabilities: list[Language] = detect_langs(self.text)
24
  return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
25
-
26
  def langid(self) -> tuple[str, float]:
27
- import langid
28
- result: tuple[str, float] = langid.classify(self.text)
29
- langcode, langecode_probabilities = result
30
- return langcode, abs(round(number=langecode_probabilities * 10, ndigits=2))
 
 
 
 
 
31
 
32
  def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
33
  """
@@ -52,10 +56,10 @@ def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, st
52
  listtoappend = [langcode, confidence_score]
53
  detections.append(listtoappend)
54
  if 'langid' in used_libraries:
55
- from langid.langid import LanguageIdentifier, model
56
- identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
57
- idresult: list[str, float] = list(identifier.classify(input_text))
58
- listtoappend = [idresult[0], abs(round(number=idresult[1] * 100, ndigits=2))]
59
  detections.append(listtoappend)
60
  print(detections)
61
  return detections
 
10
  iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
11
  DEFAULTS = None
12
 
13
+ libraries = ["langdetect", "langid", "lingua-py"]
14
 
15
  class Detect():
16
  def __init__(self, text: str) -> None:
 
22
  langcode = detect(self.text)
23
  langecode_probabilities: list[Language] = detect_langs(self.text)
24
  return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
 
25
  def langid(self) -> tuple[str, float]:
26
+ from langid.langid import LanguageIdentifier, model
27
+ identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
28
+ idresult: list[str, float] = list(identifier.classify(self.text))
29
+ return [idresult[0], abs(round(number=idresult[1] * 100, ndigits=2))]
30
+ def lingua(self) -> tuple[str, float]:
31
+ from lingua import Language, LanguageDetectorBuilder
32
+ detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
33
+ confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
34
+ return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
35
 
36
  def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
37
  """
 
56
  listtoappend = [langcode, confidence_score]
57
  detections.append(listtoappend)
58
  if 'langid' in used_libraries:
59
+ listtoappend = detectinstance.langid()
60
+ detections.append(listtoappend)
61
+ if 'lingua-py' in used_libraries:
62
+ listtoappend = detectinstance.lingua()
63
  detections.append(listtoappend)
64
  print(detections)
65
  return detections