ProjectEcho / survey_translator.py
jmisak's picture
Upload 23 files
196c707 verified
"""
Survey Translation Module - Translate surveys to reach wider audiences
"""
import json
from typing import Dict, List
from llm_backend import LLMBackend
class SurveyTranslator:
"""
Translates surveys into multiple languages while preserving
meaning, context, and cultural appropriateness.
"""
# Common target languages for research
SUPPORTED_LANGUAGES = {
"es": "Spanish",
"fr": "French",
"de": "German",
"pt": "Portuguese",
"it": "Italian",
"zh": "Chinese (Simplified)",
"ja": "Japanese",
"ko": "Korean",
"ar": "Arabic",
"hi": "Hindi",
"ru": "Russian",
"nl": "Dutch",
"sv": "Swedish",
"pl": "Polish",
"tr": "Turkish",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesian"
}
def __init__(self, llm_backend: LLMBackend):
self.llm = llm_backend
def translate_survey(self, survey_data: Dict, target_language: str) -> Dict:
"""
Translate an entire survey to a target language.
Args:
survey_data: Survey dictionary with title, introduction, questions, closing
target_language: Target language code (e.g., 'es', 'fr') or full name
Returns:
Translated survey dictionary with same structure
"""
# Resolve language name
language_name = self._resolve_language(target_language)
if not language_name:
raise ValueError(f"Unsupported language: {target_language}")
# Create a copy of the survey data
translated_survey = survey_data.copy()
# Translate main fields
translated_survey["title"] = self._translate_text(
survey_data.get("title", ""),
language_name,
context="survey title"
)
translated_survey["introduction"] = self._translate_text(
survey_data.get("introduction", ""),
language_name,
context="survey introduction"
)
translated_survey["closing"] = self._translate_text(
survey_data.get("closing", ""),
language_name,
context="survey closing message"
)
# Translate questions
translated_questions = []
for question in survey_data.get("questions", []):
translated_q = self._translate_question(question, language_name)
translated_questions.append(translated_q)
translated_survey["questions"] = translated_questions
# Add translation metadata
if "metadata" not in translated_survey:
translated_survey["metadata"] = {}
translated_survey["metadata"]["translated_to"] = language_name
translated_survey["metadata"]["original_language"] = "English"
return translated_survey
def translate_batch(self, survey_data: Dict, target_languages: List[str]) -> Dict[str, Dict]:
"""
Translate survey to multiple languages.
Args:
survey_data: Original survey data
target_languages: List of target language codes
Returns:
Dictionary mapping language codes to translated surveys
"""
translations = {}
for lang_code in target_languages:
try:
translated = self.translate_survey(survey_data, lang_code)
translations[lang_code] = translated
except Exception as e:
translations[lang_code] = {"error": str(e)}
return translations
def _resolve_language(self, language: str) -> str:
"""Resolve language code or name to full name"""
language = language.strip().lower()
# Check if it's a code
if language in self.SUPPORTED_LANGUAGES:
return self.SUPPORTED_LANGUAGES[language]
# Check if it's a full name
for code, name in self.SUPPORTED_LANGUAGES.items():
if name.lower() == language:
return name
# Return as-is if not found (LLM might still handle it)
return language.title()
def _translate_text(self, text: str, target_language: str, context: str = "") -> str:
"""
Translate a piece of text with context awareness.
Args:
text: Text to translate
target_language: Target language name
context: Context for better translation (e.g., "survey question")
Returns:
Translated text
"""
if not text or not text.strip():
return text
context_note = f" (this is a {context})" if context else ""
prompt = f"""Translate the following text to {target_language}{context_note}.
Maintain:
- The original meaning and nuance
- Professional and respectful tone
- Cultural appropriateness
- Any formatting or structure
Original text:
{text}
Provide only the translation, no explanations or notes."""
messages = [
{"role": "system", "content": self._get_translation_system_prompt()},
{"role": "user", "content": prompt}
]
try:
translation = self.llm.generate(messages, max_tokens=1000, temperature=0.3)
return translation.strip()
except Exception as e:
raise Exception(f"Translation failed: {str(e)}")
def _translate_question(self, question: Dict, target_language: str) -> Dict:
"""
Translate a single question with all its components.
Args:
question: Question dictionary
target_language: Target language name
Returns:
Translated question dictionary
"""
translated_q = question.copy()
# Translate question text
translated_q["question_text"] = self._translate_text(
question.get("question_text", ""),
target_language,
context="survey question"
)
# Translate options if present
if "options" in question and question["options"]:
translated_options = []
for option in question["options"]:
translated_option = self._translate_text(
option,
target_language,
context="answer option"
)
translated_options.append(translated_option)
translated_q["options"] = translated_options
# Translate help text if present
if "help_text" in question and question["help_text"]:
translated_q["help_text"] = self._translate_text(
question["help_text"],
target_language,
context="help text"
)
return translated_q
def _get_translation_system_prompt(self) -> str:
"""System prompt for translation tasks"""
return """You are an expert translator specializing in survey research and qualitative studies.
Your translations must:
1. Preserve the exact meaning and intent of the original text
2. Use culturally appropriate language for the target audience
3. Maintain professional and neutral tone
4. Adapt idioms and expressions appropriately
5. Keep the same level of formality
6. Preserve any special formatting or structure
For survey questions, be especially careful to:
- Avoid introducing bias
- Keep questions clear and unambiguous
- Maintain the same question type and structure
- Use natural, conversational language when appropriate
Provide accurate, natural-sounding translations that a native speaker would use."""
def back_translate(self, translated_text: str, original_language: str = "English") -> str:
"""
Back-translate text to check translation quality.
Args:
translated_text: The translated text
original_language: Language to translate back to
Returns:
Back-translated text
"""
prompt = f"""Translate the following text back to {original_language}.
Text to translate:
{translated_text}
Provide only the translation, no explanations."""
messages = [
{"role": "system", "content": "You are an expert translator. Translate accurately."},
{"role": "user", "content": prompt}
]
return self.llm.generate(messages, max_tokens=1000, temperature=0.3).strip()
def get_supported_languages(self) -> Dict[str, str]:
"""Get dictionary of supported language codes and names"""
return self.SUPPORTED_LANGUAGES.copy()