Spaces:
Running
Running
Refactor language detection to use a consistent DetectorFactory instance for improved accuracy
Browse files
app.py
CHANGED
|
@@ -331,17 +331,17 @@ def detect_language(text: str) -> str:
|
|
| 331 |
# First detection with langdetect
|
| 332 |
from langdetect import detect, LangDetectException, DetectorFactory
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
try:
|
| 335 |
-
|
| 336 |
-
DetectorFactory.seed = 0 # For consistent results
|
| 337 |
-
|
| 338 |
-
lang_code = detect(text.strip())
|
| 339 |
logger.debug(f"Detected language: {lang_code}")
|
| 340 |
|
| 341 |
-
# Verify detection with confidence check
|
| 342 |
if len(text) > 50:
|
| 343 |
-
detector = DetectorFactory().create() # Правильное создание детектора
|
| 344 |
-
detector.append(text)
|
| 345 |
lang_probabilities = detector.get_probabilities()
|
| 346 |
|
| 347 |
# If top language has low probability, fallback to English
|
|
|
|
| 331 |
# First detection with langdetect
|
| 332 |
from langdetect import detect, LangDetectException, DetectorFactory
|
| 333 |
|
| 334 |
+
# Initialize DetectorFactory properly
|
| 335 |
+
DetectorFactory.seed = 0 # For consistent results
|
| 336 |
+
detector = DetectorFactory.create()
|
| 337 |
+
detector.append(text.strip())
|
| 338 |
+
|
| 339 |
try:
|
| 340 |
+
lang_code = detector.detect()
|
|
|
|
|
|
|
|
|
|
| 341 |
logger.debug(f"Detected language: {lang_code}")
|
| 342 |
|
| 343 |
+
# Verify detection with confidence check
|
| 344 |
if len(text) > 50:
|
|
|
|
|
|
|
| 345 |
lang_probabilities = detector.get_probabilities()
|
| 346 |
|
| 347 |
# If top language has low probability, fallback to English
|