Rulga commited on
Commit
bfaabcf
·
1 Parent(s): d9488b7

Refactor language detection to use a consistent DetectorFactory instance for improved accuracy

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -331,17 +331,17 @@ def detect_language(text: str) -> str:
331
  # First detection with langdetect
332
  from langdetect import detect, LangDetectException, DetectorFactory
333
 
 
 
 
 
 
334
  try:
335
- # Initialize DetectorFactory once
336
- DetectorFactory.seed = 0 # For consistent results
337
-
338
- lang_code = detect(text.strip())
339
  logger.debug(f"Detected language: {lang_code}")
340
 
341
- # Verify detection with confidence check by analyzing a larger portion of text
342
  if len(text) > 50:
343
- detector = DetectorFactory().create() # Правильное создание детектора
344
- detector.append(text)
345
  lang_probabilities = detector.get_probabilities()
346
 
347
  # If top language has low probability, fallback to English
 
331
  # First detection with langdetect
332
  from langdetect import detect, LangDetectException, DetectorFactory
333
 
334
+ # Initialize DetectorFactory properly
335
+ DetectorFactory.seed = 0 # For consistent results
336
+ detector = DetectorFactory.create()
337
+ detector.append(text.strip())
338
+
339
  try:
340
+ lang_code = detector.detect()
 
 
 
341
  logger.debug(f"Detected language: {lang_code}")
342
 
343
+ # Verify detection with confidence check
344
  if len(text) > 50:
 
 
345
  lang_probabilities = detector.get_probabilities()
346
 
347
  # If top language has low probability, fallback to English