Pygmales
updated project state
268baab
raw
history blame
951 Bytes
from langdetect import DetectorFactory, detect_langs
from src.utils.logging import get_logger
from config import LANG_AMBIGUITY_THRESHOLD
logger = get_logger('lang_utils')
DetectorFactory.seed = 0
def detect_language(text: str):
"""
Detects if the provided text is written in German or in some other language.
In case of ambiguous input returns 'en'.
Args:
text (str): The text to analyze.
Returns:
str: 'de' if the detection certanty is more than 0.6, else 'en'.
"""
found_langs = detect_langs(text)
top_lang = found_langs[0]
logger.info(f'Found following languages in the text: {", ".join(f"{lang.lang}-{lang.prob:1.2f}" for lang in found_langs)}')
return 'de' if top_lang.lang == 'de' and top_lang.prob >= LANG_AMBIGUITY_THRESHOLD else 'en'
def get_language_name(code: str):
return {
'en': "British English",
'de': "German",
}.get(code, 'British English')