| | """ |
| | Translation Agent |
| | Translates non-English comments to English using LLM |
| | """ |
| |
|
| | from typing import Dict, Any |
| | import json |
| | from langchain_openai import ChatOpenAI |
| | from langchain.schema import HumanMessage, SystemMessage |
| | from agents.base_agent import BaseAgent |
| | import logging |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class TranslationAgent(BaseAgent): |
| | """ |
| | Agent that translates text from source language to English. |
| | Uses LLM for high-quality, context-aware translation. |
| | """ |
| |
|
| | def __init__(self, config: Dict[str, Any], api_key: str): |
| | """ |
| | Initialize the Translation Agent. |
| | |
| | Args: |
| | config: Configuration dictionary |
| | api_key: OpenAI API key |
| | """ |
| | super().__init__("TranslationAgent", config) |
| | self.api_key = api_key |
| | self.llm = ChatOpenAI( |
| | model=self.model, |
| | temperature=self.temperature, |
| | api_key=self.api_key |
| | ) |
| |
|
| | def validate_input(self, input_data: Dict[str, Any]) -> bool: |
| | """ |
| | Validate that input contains required fields. |
| | |
| | Args: |
| | input_data: Input dictionary |
| | |
| | Returns: |
| | True if valid, False otherwise |
| | """ |
| | required_fields = ["comment_text", "is_english"] |
| | return all(field in input_data for field in required_fields) |
| |
|
| | def translate_text(self, text: str, source_language: str) -> Dict[str, Any]: |
| | """ |
| | Translate text from source language to English using LLM. |
| | |
| | Args: |
| | text: Text to translate |
| | source_language: Source language name |
| | |
| | Returns: |
| | Dictionary with translation results |
| | """ |
| | system_prompt = """You are a professional translator specializing in social media content related to music and education. |
| | Translate the given text from the source language to English. The text is a comment on a musical content. |
| | Preserve the tone, intent, and any emojis or special characters. |
| | For informal social media language, maintain the casual tone in translation. |
| | |
| | Return your response in JSON format with the following fields: |
| | - translated_text: The English translation |
| | - translation_confidence: Your confidence level (high, medium, low) |
| | - notes: Any important notes about the translation (optional) |
| | """ |
| |
|
| | user_prompt = f"""Translate this {source_language} comment to English: |
| | |
| | "{text}" |
| | |
| | Return JSON only.""" |
| |
|
| | try: |
| | messages = [ |
| | SystemMessage(content=system_prompt), |
| | HumanMessage(content=user_prompt) |
| | ] |
| |
|
| | response = self.llm.invoke(messages) |
| | result = self._parse_llm_json_response(response.content) |
| |
|
| | return { |
| | "success": True, |
| | "translated_text": result.get("translated_text", text), |
| | "translation_confidence": result.get("translation_confidence", "medium"), |
| | "translation_notes": result.get("notes", "") |
| | } |
| |
|
| | except json.JSONDecodeError as e: |
| | self.log_processing(f"JSON decode error: {str(e)}", "warning") |
| | |
| | return { |
| | "success": False, |
| | "translated_text": text, |
| | "translation_confidence": "low", |
| | "translation_notes": "JSON parsing failed", |
| | "error": str(e) |
| | } |
| |
|
| | except Exception as e: |
| | self.log_processing(f"Translation failed: {str(e)}", "error") |
| | return { |
| | "success": False, |
| | "translated_text": text, |
| | "translation_confidence": "low", |
| | "translation_notes": "Translation error", |
| | "error": str(e) |
| | } |
| |
|
| | def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]: |
| | """ |
| | Process comment and translate if needed. |
| | |
| | Args: |
| | input_data: Dictionary containing comment data with language info |
| | |
| | Returns: |
| | Dictionary with translation results |
| | """ |
| | try: |
| | |
| | if not self.validate_input(input_data): |
| | return { |
| | "success": False, |
| | "error": "Invalid input: missing required fields", |
| | "translated_text": input_data.get("comment_text", ""), |
| | "translation_performed": False |
| | } |
| |
|
| | comment_text = input_data["comment_text"] |
| | is_english = input_data["is_english"] |
| | source_language = input_data.get("language", "Unknown") |
| |
|
| | |
| | if is_english: |
| | result = { |
| | "success": True, |
| | "translated_text": comment_text, |
| | "translation_performed": False, |
| | "translation_confidence": "N/A", |
| | "translation_notes": "Original text is English" |
| | } |
| | self.log_processing("Text is already English, skipping translation", "debug") |
| | else: |
| | |
| | self.log_processing( |
| | f"Translating from {source_language} to English", |
| | "debug" |
| | ) |
| |
|
| | translation_result = self.translate_text(comment_text, source_language) |
| |
|
| | result = { |
| | "success": translation_result.get("success", True), |
| | "translated_text": translation_result.get("translated_text", comment_text), |
| | "translation_performed": True, |
| | "translation_confidence": translation_result.get("translation_confidence", "medium"), |
| | "translation_notes": translation_result.get("translation_notes", "") |
| | } |
| |
|
| | if "error" in translation_result: |
| | result["translation_error"] = translation_result["error"] |
| |
|
| | |
| | for key, value in input_data.items(): |
| | if key not in result: |
| | result[key] = value |
| |
|
| | return result |
| |
|
| | except Exception as e: |
| | return self.handle_error(e, "translation") |
| |
|
| | def _parse_llm_json_response(self, response_content: str) -> Dict[str, Any]: |
| | """ |
| | Parse LLM response that may contain JSON wrapped in markdown code blocks. |
| | |
| | Args: |
| | response_content: Raw response content from LLM |
| | |
| | Returns: |
| | Parsed JSON dictionary |
| | |
| | Raises: |
| | json.JSONDecodeError: If JSON cannot be parsed |
| | """ |
| | content = response_content.strip() |
| |
|
| | |
| | if content.startswith("```json"): |
| | |
| | content = content[7:] |
| | if content.endswith("```"): |
| | content = content[:-3] |
| | content = content.strip() |
| | elif content.startswith("```"): |
| | |
| | content = content[3:] |
| | if content.endswith("```"): |
| | content = content[:-3] |
| | content = content.strip() |
| |
|
| | |
| | return json.loads(content) |