Spaces:

jmisak
/

ProjectEcho

Sleeping

App Files Files Community

ProjectEcho / survey_translator.py

jmisak

Upload 23 files

196c707 verified 3 months ago

raw

history blame contribute delete

8.82 kB

	"""
	Survey Translation Module - Translate surveys to reach wider audiences
	"""
	import json
	from typing import Dict, List
	from llm_backend import LLMBackend


	class SurveyTranslator:
	"""
	Translates surveys into multiple languages while preserving
	meaning, context, and cultural appropriateness.
	"""

	# Common target languages for research
	SUPPORTED_LANGUAGES = {
	"es": "Spanish",
	"fr": "French",
	"de": "German",
	"pt": "Portuguese",
	"it": "Italian",
	"zh": "Chinese (Simplified)",
	"ja": "Japanese",
	"ko": "Korean",
	"ar": "Arabic",
	"hi": "Hindi",
	"ru": "Russian",
	"nl": "Dutch",
	"sv": "Swedish",
	"pl": "Polish",
	"tr": "Turkish",
	"vi": "Vietnamese",
	"th": "Thai",
	"id": "Indonesian"
	}

	def __init__(self, llm_backend: LLMBackend):
	self.llm = llm_backend

	def translate_survey(self, survey_data: Dict, target_language: str) -> Dict:
	"""
	Translate an entire survey to a target language.

	Args:
	survey_data: Survey dictionary with title, introduction, questions, closing
	target_language: Target language code (e.g., 'es', 'fr') or full name

	Returns:
	Translated survey dictionary with same structure
	"""
	# Resolve language name
	language_name = self._resolve_language(target_language)

	if not language_name:
	raise ValueError(f"Unsupported language: {target_language}")

	# Create a copy of the survey data
	translated_survey = survey_data.copy()

	# Translate main fields
	translated_survey["title"] = self._translate_text(
	survey_data.get("title", ""),
	language_name,
	context="survey title"
	)

	translated_survey["introduction"] = self._translate_text(
	survey_data.get("introduction", ""),
	language_name,
	context="survey introduction"
	)

	translated_survey["closing"] = self._translate_text(
	survey_data.get("closing", ""),
	language_name,
	context="survey closing message"
	)

	# Translate questions
	translated_questions = []
	for question in survey_data.get("questions", []):
	translated_q = self._translate_question(question, language_name)
	translated_questions.append(translated_q)

	translated_survey["questions"] = translated_questions

	# Add translation metadata
	if "metadata" not in translated_survey:
	translated_survey["metadata"] = {}
	translated_survey["metadata"]["translated_to"] = language_name
	translated_survey["metadata"]["original_language"] = "English"

	return translated_survey

	def translate_batch(self, survey_data: Dict, target_languages: List[str]) -> Dict[str, Dict]:
	"""
	Translate survey to multiple languages.

	Args:
	survey_data: Original survey data
	target_languages: List of target language codes

	Returns:
	Dictionary mapping language codes to translated surveys
	"""
	translations = {}

	for lang_code in target_languages:
	try:
	translated = self.translate_survey(survey_data, lang_code)
	translations[lang_code] = translated
	except Exception as e:
	translations[lang_code] = {"error": str(e)}

	return translations

	def _resolve_language(self, language: str) -> str:
	"""Resolve language code or name to full name"""
	language = language.strip().lower()

	# Check if it's a code
	if language in self.SUPPORTED_LANGUAGES:
	return self.SUPPORTED_LANGUAGES[language]

	# Check if it's a full name
	for code, name in self.SUPPORTED_LANGUAGES.items():
	if name.lower() == language:
	return name

	# Return as-is if not found (LLM might still handle it)
	return language.title()

	def _translate_text(self, text: str, target_language: str, context: str = "") -> str:
	"""
	Translate a piece of text with context awareness.

	Args:
	text: Text to translate
	target_language: Target language name
	context: Context for better translation (e.g., "survey question")

	Returns:
	Translated text
	"""
	if not text or not text.strip():
	return text

	context_note = f" (this is a {context})" if context else ""

	prompt = f"""Translate the following text to {target_language}{context_note}.

	Maintain:
	- The original meaning and nuance
	- Professional and respectful tone
	- Cultural appropriateness
	- Any formatting or structure

	Original text:
	{text}

	Provide only the translation, no explanations or notes."""

	messages = [
	{"role": "system", "content": self._get_translation_system_prompt()},
	{"role": "user", "content": prompt}
	]

	try:
	translation = self.llm.generate(messages, max_tokens=1000, temperature=0.3)
	return translation.strip()
	except Exception as e:
	raise Exception(f"Translation failed: {str(e)}")

	def _translate_question(self, question: Dict, target_language: str) -> Dict:
	"""
	Translate a single question with all its components.

	Args:
	question: Question dictionary
	target_language: Target language name

	Returns:
	Translated question dictionary
	"""
	translated_q = question.copy()

	# Translate question text
	translated_q["question_text"] = self._translate_text(
	question.get("question_text", ""),
	target_language,
	context="survey question"
	)

	# Translate options if present
	if "options" in question and question["options"]:
	translated_options = []
	for option in question["options"]:
	translated_option = self._translate_text(
	option,
	target_language,
	context="answer option"
	)
	translated_options.append(translated_option)
	translated_q["options"] = translated_options

	# Translate help text if present
	if "help_text" in question and question["help_text"]:
	translated_q["help_text"] = self._translate_text(
	question["help_text"],
	target_language,
	context="help text"
	)

	return translated_q

	def _get_translation_system_prompt(self) -> str:
	"""System prompt for translation tasks"""
	return """You are an expert translator specializing in survey research and qualitative studies.

	Your translations must:
	1. Preserve the exact meaning and intent of the original text
	2. Use culturally appropriate language for the target audience
	3. Maintain professional and neutral tone
	4. Adapt idioms and expressions appropriately
	5. Keep the same level of formality
	6. Preserve any special formatting or structure

	For survey questions, be especially careful to:
	- Avoid introducing bias
	- Keep questions clear and unambiguous
	- Maintain the same question type and structure
	- Use natural, conversational language when appropriate

	Provide accurate, natural-sounding translations that a native speaker would use."""

	def back_translate(self, translated_text: str, original_language: str = "English") -> str:
	"""
	Back-translate text to check translation quality.

	Args:
	translated_text: The translated text
	original_language: Language to translate back to

	Returns:
	Back-translated text
	"""
	prompt = f"""Translate the following text back to {original_language}.

	Text to translate:
	{translated_text}

	Provide only the translation, no explanations."""

	messages = [
	{"role": "system", "content": "You are an expert translator. Translate accurately."},
	{"role": "user", "content": prompt}
	]

	return self.llm.generate(messages, max_tokens=1000, temperature=0.3).strip()

	def get_supported_languages(self) -> Dict[str, str]:
	"""Get dictionary of supported language codes and names"""
	return self.SUPPORTED_LANGUAGES.copy()