TiberiuCristianLeon commited on
Commit
6f0b2fb
·
verified ·
1 Parent(s): 5a601ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -10,7 +10,7 @@ all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Rom
10
  iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
11
  DEFAULTS = None
12
 
13
- libraries = ["langdetect", "langid", "lingua-py"]
14
 
15
  class Detect():
16
  def __init__(self, text: str) -> None:
@@ -21,7 +21,7 @@ class Detect():
21
  DetectorFactory.seed = 0
22
  langcode = detect(self.text)
23
  langecode_probabilities: list[Language] = detect_langs(self.text)
24
- return langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)
25
  def langid(self) -> tuple[str, float]:
26
  from langid.langid import LanguageIdentifier, model
27
  identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
@@ -32,6 +32,10 @@ class Detect():
32
  detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
33
  confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
34
  return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
 
 
 
 
35
 
36
  def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
37
  """
@@ -51,16 +55,16 @@ def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, st
51
  """
52
  detectinstance = Detect(input_text)
53
  detections = []
 
 
54
  if 'langdetect' in used_libraries:
55
- langcode, confidence_score = detectinstance.langdetect()
56
- listtoappend = [langcode, confidence_score]
57
- detections.append(listtoappend)
58
  if 'langid' in used_libraries:
59
- listtoappend = detectinstance.langid()
60
- detections.append(listtoappend)
61
  if 'lingua-py' in used_libraries:
62
- listtoappend = detectinstance.lingua()
63
- detections.append(listtoappend)
 
64
  print(detections)
65
  return detections
66
 
@@ -76,7 +80,7 @@ with gr.Blocks() as interface:
76
  type='array',
77
  row_count=len(libraries),
78
  column_count=2,
79
- column_limits=(2, 3),
80
  label='Language detection dataframe'
81
  )
82
  input_text.submit(
 
10
  iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
11
  DEFAULTS = None
12
 
13
+ libraries = ["langdetect", "langid", "lingua-py", "fasttlextangdetect"]
14
 
15
  class Detect():
16
  def __init__(self, text: str) -> None:
 
21
  DetectorFactory.seed = 0
22
  langcode = detect(self.text)
23
  langecode_probabilities: list[Language] = detect_langs(self.text)
24
+ return [langcode, round(number=langecode_probabilities[0].prob * 100, ndigits=2)]
25
  def langid(self) -> tuple[str, float]:
26
  from langid.langid import LanguageIdentifier, model
27
  identifier = LanguageIdentifier.from_modelstring(string=model, norm_probs=True)
 
32
  detector: LanguageDetector = LanguageDetectorBuilder.from_all_languages().with_preloaded_language_models().build()
33
  confidence_values: List[ConfidenceValue] = detector.compute_language_confidence_values(self.text)
34
  return [confidence_values[0].language.iso_code_639_1.name.lower(), "{0:.2f}".format(confidence_values[0].value * 100)]
35
+ def fasttlextangdetect(self) -> tuple[str, float]:
36
+ from ftlangdetect import detect
37
+ result = detect(text=self.text, low_memory=False)
38
+ return [result.get('lang'), result.get('score')]
39
 
40
  def detect_language(input_text: str, used_libraries: list[str]) -> tuple[str, str]:
41
  """
 
55
  """
56
  detectinstance = Detect(input_text)
57
  detections = []
58
+ if not input_text or not used_libraries:
59
+ return [['No input text or library selected', 'Please provide input text and/or select a detection library']]
60
  if 'langdetect' in used_libraries:
61
+ detections.append(detectinstance.langdetect())
 
 
62
  if 'langid' in used_libraries:
63
+ detections.append(detectinstance.langid())
 
64
  if 'lingua-py' in used_libraries:
65
+ detections.append(detectinstance.lingua())
66
+ if 'fasttlextangdetect' in used_libraries:
67
+ detections.append(detectinstance.fasttlextangdetect())
68
  print(detections)
69
  return detections
70
 
 
80
  type='array',
81
  row_count=len(libraries),
82
  column_count=2,
83
+ column_limits=(2, 4),
84
  label='Language detection dataframe'
85
  )
86
  input_text.submit(