Spaces:
Sleeping
Sleeping
| """ | |
| Survey Translation Module - Translate surveys to reach wider audiences | |
| """ | |
| import json | |
| from typing import Dict, List | |
| from llm_backend import LLMBackend | |
| class SurveyTranslator: | |
| """ | |
| Translates surveys into multiple languages while preserving | |
| meaning, context, and cultural appropriateness. | |
| """ | |
| # Common target languages for research | |
| SUPPORTED_LANGUAGES = { | |
| "es": "Spanish", | |
| "fr": "French", | |
| "de": "German", | |
| "pt": "Portuguese", | |
| "it": "Italian", | |
| "zh": "Chinese (Simplified)", | |
| "ja": "Japanese", | |
| "ko": "Korean", | |
| "ar": "Arabic", | |
| "hi": "Hindi", | |
| "ru": "Russian", | |
| "nl": "Dutch", | |
| "sv": "Swedish", | |
| "pl": "Polish", | |
| "tr": "Turkish", | |
| "vi": "Vietnamese", | |
| "th": "Thai", | |
| "id": "Indonesian" | |
| } | |
| def __init__(self, llm_backend: LLMBackend): | |
| self.llm = llm_backend | |
| def translate_survey(self, survey_data: Dict, target_language: str) -> Dict: | |
| """ | |
| Translate an entire survey to a target language. | |
| Args: | |
| survey_data: Survey dictionary with title, introduction, questions, closing | |
| target_language: Target language code (e.g., 'es', 'fr') or full name | |
| Returns: | |
| Translated survey dictionary with same structure | |
| """ | |
| # Resolve language name | |
| language_name = self._resolve_language(target_language) | |
| if not language_name: | |
| raise ValueError(f"Unsupported language: {target_language}") | |
| # Create a copy of the survey data | |
| translated_survey = survey_data.copy() | |
| # Translate main fields | |
| translated_survey["title"] = self._translate_text( | |
| survey_data.get("title", ""), | |
| language_name, | |
| context="survey title" | |
| ) | |
| translated_survey["introduction"] = self._translate_text( | |
| survey_data.get("introduction", ""), | |
| language_name, | |
| context="survey introduction" | |
| ) | |
| translated_survey["closing"] = self._translate_text( | |
| survey_data.get("closing", ""), | |
| language_name, | |
| context="survey closing message" | |
| ) | |
| # Translate questions | |
| translated_questions = [] | |
| for question in survey_data.get("questions", []): | |
| translated_q = self._translate_question(question, language_name) | |
| translated_questions.append(translated_q) | |
| translated_survey["questions"] = translated_questions | |
| # Add translation metadata | |
| if "metadata" not in translated_survey: | |
| translated_survey["metadata"] = {} | |
| translated_survey["metadata"]["translated_to"] = language_name | |
| translated_survey["metadata"]["original_language"] = "English" | |
| return translated_survey | |
| def translate_batch(self, survey_data: Dict, target_languages: List[str]) -> Dict[str, Dict]: | |
| """ | |
| Translate survey to multiple languages. | |
| Args: | |
| survey_data: Original survey data | |
| target_languages: List of target language codes | |
| Returns: | |
| Dictionary mapping language codes to translated surveys | |
| """ | |
| translations = {} | |
| for lang_code in target_languages: | |
| try: | |
| translated = self.translate_survey(survey_data, lang_code) | |
| translations[lang_code] = translated | |
| except Exception as e: | |
| translations[lang_code] = {"error": str(e)} | |
| return translations | |
| def _resolve_language(self, language: str) -> str: | |
| """Resolve language code or name to full name""" | |
| language = language.strip().lower() | |
| # Check if it's a code | |
| if language in self.SUPPORTED_LANGUAGES: | |
| return self.SUPPORTED_LANGUAGES[language] | |
| # Check if it's a full name | |
| for code, name in self.SUPPORTED_LANGUAGES.items(): | |
| if name.lower() == language: | |
| return name | |
| # Return as-is if not found (LLM might still handle it) | |
| return language.title() | |
| def _translate_text(self, text: str, target_language: str, context: str = "") -> str: | |
| """ | |
| Translate a piece of text with context awareness. | |
| Args: | |
| text: Text to translate | |
| target_language: Target language name | |
| context: Context for better translation (e.g., "survey question") | |
| Returns: | |
| Translated text | |
| """ | |
| if not text or not text.strip(): | |
| return text | |
| context_note = f" (this is a {context})" if context else "" | |
| prompt = f"""Translate the following text to {target_language}{context_note}. | |
| Maintain: | |
| - The original meaning and nuance | |
| - Professional and respectful tone | |
| - Cultural appropriateness | |
| - Any formatting or structure | |
| Original text: | |
| {text} | |
| Provide only the translation, no explanations or notes.""" | |
| messages = [ | |
| {"role": "system", "content": self._get_translation_system_prompt()}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| try: | |
| translation = self.llm.generate(messages, max_tokens=1000, temperature=0.3) | |
| return translation.strip() | |
| except Exception as e: | |
| raise Exception(f"Translation failed: {str(e)}") | |
| def _translate_question(self, question: Dict, target_language: str) -> Dict: | |
| """ | |
| Translate a single question with all its components. | |
| Args: | |
| question: Question dictionary | |
| target_language: Target language name | |
| Returns: | |
| Translated question dictionary | |
| """ | |
| translated_q = question.copy() | |
| # Translate question text | |
| translated_q["question_text"] = self._translate_text( | |
| question.get("question_text", ""), | |
| target_language, | |
| context="survey question" | |
| ) | |
| # Translate options if present | |
| if "options" in question and question["options"]: | |
| translated_options = [] | |
| for option in question["options"]: | |
| translated_option = self._translate_text( | |
| option, | |
| target_language, | |
| context="answer option" | |
| ) | |
| translated_options.append(translated_option) | |
| translated_q["options"] = translated_options | |
| # Translate help text if present | |
| if "help_text" in question and question["help_text"]: | |
| translated_q["help_text"] = self._translate_text( | |
| question["help_text"], | |
| target_language, | |
| context="help text" | |
| ) | |
| return translated_q | |
| def _get_translation_system_prompt(self) -> str: | |
| """System prompt for translation tasks""" | |
| return """You are an expert translator specializing in survey research and qualitative studies. | |
| Your translations must: | |
| 1. Preserve the exact meaning and intent of the original text | |
| 2. Use culturally appropriate language for the target audience | |
| 3. Maintain professional and neutral tone | |
| 4. Adapt idioms and expressions appropriately | |
| 5. Keep the same level of formality | |
| 6. Preserve any special formatting or structure | |
| For survey questions, be especially careful to: | |
| - Avoid introducing bias | |
| - Keep questions clear and unambiguous | |
| - Maintain the same question type and structure | |
| - Use natural, conversational language when appropriate | |
| Provide accurate, natural-sounding translations that a native speaker would use.""" | |
| def back_translate(self, translated_text: str, original_language: str = "English") -> str: | |
| """ | |
| Back-translate text to check translation quality. | |
| Args: | |
| translated_text: The translated text | |
| original_language: Language to translate back to | |
| Returns: | |
| Back-translated text | |
| """ | |
| prompt = f"""Translate the following text back to {original_language}. | |
| Text to translate: | |
| {translated_text} | |
| Provide only the translation, no explanations.""" | |
| messages = [ | |
| {"role": "system", "content": "You are an expert translator. Translate accurately."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| return self.llm.generate(messages, max_tokens=1000, temperature=0.3).strip() | |
| def get_supported_languages(self) -> Dict[str, str]: | |
| """Get dictionary of supported language codes and names""" | |
| return self.SUPPORTED_LANGUAGES.copy() | |