""" Survey Translation Module - Translate surveys to reach wider audiences """ import json from typing import Dict, List from llm_backend import LLMBackend class SurveyTranslator: """ Translates surveys into multiple languages while preserving meaning, context, and cultural appropriateness. """ # Common target languages for research SUPPORTED_LANGUAGES = { "es": "Spanish", "fr": "French", "de": "German", "pt": "Portuguese", "it": "Italian", "zh": "Chinese (Simplified)", "ja": "Japanese", "ko": "Korean", "ar": "Arabic", "hi": "Hindi", "ru": "Russian", "nl": "Dutch", "sv": "Swedish", "pl": "Polish", "tr": "Turkish", "vi": "Vietnamese", "th": "Thai", "id": "Indonesian" } def __init__(self, llm_backend: LLMBackend): self.llm = llm_backend def translate_survey(self, survey_data: Dict, target_language: str) -> Dict: """ Translate an entire survey to a target language. Args: survey_data: Survey dictionary with title, introduction, questions, closing target_language: Target language code (e.g., 'es', 'fr') or full name Returns: Translated survey dictionary with same structure """ # Resolve language name language_name = self._resolve_language(target_language) if not language_name: raise ValueError(f"Unsupported language: {target_language}") # Create a copy of the survey data translated_survey = survey_data.copy() # Translate main fields translated_survey["title"] = self._translate_text( survey_data.get("title", ""), language_name, context="survey title" ) translated_survey["introduction"] = self._translate_text( survey_data.get("introduction", ""), language_name, context="survey introduction" ) translated_survey["closing"] = self._translate_text( survey_data.get("closing", ""), language_name, context="survey closing message" ) # Translate questions translated_questions = [] for question in survey_data.get("questions", []): translated_q = self._translate_question(question, language_name) translated_questions.append(translated_q) translated_survey["questions"] = translated_questions # Add translation metadata if "metadata" not in translated_survey: translated_survey["metadata"] = {} translated_survey["metadata"]["translated_to"] = language_name translated_survey["metadata"]["original_language"] = "English" return translated_survey def translate_batch(self, survey_data: Dict, target_languages: List[str]) -> Dict[str, Dict]: """ Translate survey to multiple languages. Args: survey_data: Original survey data target_languages: List of target language codes Returns: Dictionary mapping language codes to translated surveys """ translations = {} for lang_code in target_languages: try: translated = self.translate_survey(survey_data, lang_code) translations[lang_code] = translated except Exception as e: translations[lang_code] = {"error": str(e)} return translations def _resolve_language(self, language: str) -> str: """Resolve language code or name to full name""" language = language.strip().lower() # Check if it's a code if language in self.SUPPORTED_LANGUAGES: return self.SUPPORTED_LANGUAGES[language] # Check if it's a full name for code, name in self.SUPPORTED_LANGUAGES.items(): if name.lower() == language: return name # Return as-is if not found (LLM might still handle it) return language.title() def _translate_text(self, text: str, target_language: str, context: str = "") -> str: """ Translate a piece of text with context awareness. Args: text: Text to translate target_language: Target language name context: Context for better translation (e.g., "survey question") Returns: Translated text """ if not text or not text.strip(): return text context_note = f" (this is a {context})" if context else "" prompt = f"""Translate the following text to {target_language}{context_note}. Maintain: - The original meaning and nuance - Professional and respectful tone - Cultural appropriateness - Any formatting or structure Original text: {text} Provide only the translation, no explanations or notes.""" messages = [ {"role": "system", "content": self._get_translation_system_prompt()}, {"role": "user", "content": prompt} ] try: translation = self.llm.generate(messages, max_tokens=1000, temperature=0.3) return translation.strip() except Exception as e: raise Exception(f"Translation failed: {str(e)}") def _translate_question(self, question: Dict, target_language: str) -> Dict: """ Translate a single question with all its components. Args: question: Question dictionary target_language: Target language name Returns: Translated question dictionary """ translated_q = question.copy() # Translate question text translated_q["question_text"] = self._translate_text( question.get("question_text", ""), target_language, context="survey question" ) # Translate options if present if "options" in question and question["options"]: translated_options = [] for option in question["options"]: translated_option = self._translate_text( option, target_language, context="answer option" ) translated_options.append(translated_option) translated_q["options"] = translated_options # Translate help text if present if "help_text" in question and question["help_text"]: translated_q["help_text"] = self._translate_text( question["help_text"], target_language, context="help text" ) return translated_q def _get_translation_system_prompt(self) -> str: """System prompt for translation tasks""" return """You are an expert translator specializing in survey research and qualitative studies. Your translations must: 1. Preserve the exact meaning and intent of the original text 2. Use culturally appropriate language for the target audience 3. Maintain professional and neutral tone 4. Adapt idioms and expressions appropriately 5. Keep the same level of formality 6. Preserve any special formatting or structure For survey questions, be especially careful to: - Avoid introducing bias - Keep questions clear and unambiguous - Maintain the same question type and structure - Use natural, conversational language when appropriate Provide accurate, natural-sounding translations that a native speaker would use.""" def back_translate(self, translated_text: str, original_language: str = "English") -> str: """ Back-translate text to check translation quality. Args: translated_text: The translated text original_language: Language to translate back to Returns: Back-translated text """ prompt = f"""Translate the following text back to {original_language}. Text to translate: {translated_text} Provide only the translation, no explanations.""" messages = [ {"role": "system", "content": "You are an expert translator. Translate accurately."}, {"role": "user", "content": prompt} ] return self.llm.generate(messages, max_tokens=1000, temperature=0.3).strip() def get_supported_languages(self) -> Dict[str, str]: """Get dictionary of supported language codes and names""" return self.SUPPORTED_LANGUAGES.copy()