"""Language detection with langdetect primary and cld3 fallback.""" from __future__ import annotations from app.core.logging import get_logger from app.models.schemas import LanguageResult logger = get_logger(__name__) def detect_language(text: str) -> LanguageResult: """Detect language using langdetect with cld3 fallback.""" if not text or len(text.strip()) < 3: return LanguageResult(language="unknown", confidence=0.0, method="none") # Primary: langdetect try: from langdetect import DetectorFactory, detect_langs DetectorFactory.seed = 42 results = detect_langs(text) if results: top = results[0] return LanguageResult( language=str(top.lang), confidence=round(top.prob, 4), method="langdetect", ) except Exception as exc: logger.debug("langdetect_failed", error=str(exc)) # Fallback: cld3 try: import cld3 result = cld3.get_language(text) if result and result.is_reliable: return LanguageResult( language=result.language, confidence=round(result.probability, 4), method="cld3", ) elif result: return LanguageResult( language=result.language, confidence=round(result.probability, 4), method="cld3", ) except ImportError: logger.warning("cld3_not_available", detail="Install pycld3 for fallback detection") except Exception as exc: logger.debug("cld3_failed", error=str(exc)) return LanguageResult(language="unknown", confidence=0.0, method="none") def detect_languages_batch(texts: list[str]) -> list[LanguageResult]: return [detect_language(t) for t in texts]