Spaces:
Sleeping
Sleeping
| from langdetect import DetectorFactory, detect_langs | |
| from src.utils.logging import get_logger | |
| from config import LANG_AMBIGUITY_THRESHOLD | |
| logger = get_logger('lang_utils') | |
| DetectorFactory.seed = 0 | |
| def detect_language(text: str): | |
| """ | |
| Detects if the provided text is written in German or in some other language. | |
| In case of ambiguous input returns 'en'. | |
| Args: | |
| text (str): The text to analyze. | |
| Returns: | |
| str: 'de' if the detection certanty is more than 0.6, else 'en'. | |
| """ | |
| found_langs = detect_langs(text) | |
| top_lang = found_langs[0] | |
| logger.info(f'Found following languages in the text: {", ".join(f"{lang.lang}-{lang.prob:1.2f}" for lang in found_langs)}') | |
| return 'de' if top_lang.lang == 'de' and top_lang.prob >= LANG_AMBIGUITY_THRESHOLD else 'en' | |
| def get_language_name(code: str): | |
| return { | |
| 'en': "British English", | |
| 'de': "German", | |
| }.get(code, 'British English') | |