Spaces:
Sleeping
Sleeping
File size: 951 Bytes
cdf68de 0a372e8 cdf68de 0a372e8 cdf68de 0a372e8 cdf68de 268baab cdf68de 0a372e8 9dcf6e4 268baab 9dcf6e4 268baab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | from langdetect import DetectorFactory, detect_langs
from src.utils.logging import get_logger
from config import LANG_AMBIGUITY_THRESHOLD
logger = get_logger('lang_utils')
DetectorFactory.seed = 0
def detect_language(text: str):
"""
Detects if the provided text is written in German or in some other language.
In case of ambiguous input returns 'en'.
Args:
text (str): The text to analyze.
Returns:
str: 'de' if the detection certanty is more than 0.6, else 'en'.
"""
found_langs = detect_langs(text)
top_lang = found_langs[0]
logger.info(f'Found following languages in the text: {", ".join(f"{lang.lang}-{lang.prob:1.2f}" for lang in found_langs)}')
return 'de' if top_lang.lang == 'de' and top_lang.prob >= LANG_AMBIGUITY_THRESHOLD else 'en'
def get_language_name(code: str):
return {
'en': "British English",
'de': "German",
}.get(code, 'British English')
|