""" Language detection and translation layer. Uses langdetect for detection and Qwen 2.5 via Ollama for translation. """ import litellm from langdetect import detect, LangDetectException from src.config import MODEL from src.usage import _extract_usage, _empty_usage # Languages we explicitly support (from the brief) SUPPORTED_LANGUAGES = { "en": "English", "mi": "te reo Māori", "tl": "Filipino", "hi": "Hindi", "sm": "Samoan", "zh-cn": "Mandarin Chinese", "zh-tw": "Cantonese/Traditional Chinese", } def detect_language(text: str) -> str: """Detect the language of the input text. Returns a language code (e.g. 'en', 'tl', 'hi', 'zh-cn'). Falls back to 'en' if detection fails. """ try: lang = detect(text) # langdetect returns 'zh-cn' for Chinese, 'tl' for Filipino, etc. return lang except LangDetectException: return "en" def get_language_name(lang_code: str) -> str: """Get human-readable language name.""" return SUPPORTED_LANGUAGES.get(lang_code, lang_code) def translate_to_english(text: str, source_lang: str) -> str: """Translate a query from the source language to English. Returns the original text if already English. """ if source_lang == "en": return text, _empty_usage() lang_name = get_language_name(source_lang) prompt = f"""Translate the following text from {lang_name} to English. Return ONLY the English translation, nothing else. Text: {text}""" try: response = litellm.completion( model=MODEL, messages=[{"role": "user", "content": prompt}], temperature=0, max_tokens=500, ) usage = _extract_usage(response) return (response.choices[0].message.content or "").strip(), usage except Exception as e: print(f"Translation error: {e}") return text, _empty_usage() # Fall back to original