Spaces:
Running
Running
| """ | |
| Language detection and translation layer. | |
| Uses langdetect for detection and Qwen 2.5 via Ollama for translation. | |
| """ | |
| import litellm | |
| from langdetect import detect, LangDetectException | |
| from src.config import MODEL | |
| from src.usage import _extract_usage, _empty_usage | |
| # Languages we explicitly support (from the brief) | |
| SUPPORTED_LANGUAGES = { | |
| "en": "English", | |
| "mi": "te reo Māori", | |
| "tl": "Filipino", | |
| "hi": "Hindi", | |
| "sm": "Samoan", | |
| "zh-cn": "Mandarin Chinese", | |
| "zh-tw": "Cantonese/Traditional Chinese", | |
| } | |
| def detect_language(text: str) -> str: | |
| """Detect the language of the input text. | |
| Returns a language code (e.g. 'en', 'tl', 'hi', 'zh-cn'). | |
| Falls back to 'en' if detection fails. | |
| """ | |
| try: | |
| lang = detect(text) | |
| # langdetect returns 'zh-cn' for Chinese, 'tl' for Filipino, etc. | |
| return lang | |
| except LangDetectException: | |
| return "en" | |
| def get_language_name(lang_code: str) -> str: | |
| """Get human-readable language name.""" | |
| return SUPPORTED_LANGUAGES.get(lang_code, lang_code) | |
| def translate_to_english(text: str, source_lang: str) -> str: | |
| """Translate a query from the source language to English. | |
| Returns the original text if already English. | |
| """ | |
| if source_lang == "en": | |
| return text, _empty_usage() | |
| lang_name = get_language_name(source_lang) | |
| prompt = f"""Translate the following text from {lang_name} to English. | |
| Return ONLY the English translation, nothing else. | |
| Text: {text}""" | |
| try: | |
| response = litellm.completion( | |
| model=MODEL, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0, | |
| max_tokens=500, | |
| ) | |
| usage = _extract_usage(response) | |
| return (response.choices[0].message.content or "").strip(), usage | |
| except Exception as e: | |
| print(f"Translation error: {e}") | |
| return text, _empty_usage() # Fall back to original | |