Spaces:
Runtime error
Runtime error
| import json | |
| from typing import Any, Dict, List | |
| from anthropic import Anthropic | |
| class LLMTranslationEditor: | |
| def __init__(self, validation_results: dict, anthropic_api_key: str): | |
| """Initialize with validation results and Anthropic API key. | |
| Args: | |
| validation_results (dict): Results from TranslationValidator | |
| anthropic_api_key (str): Anthropic API key for Claude access | |
| """ | |
| self.results = validation_results | |
| self.client = Anthropic(api_key=anthropic_api_key) | |
| def edit_translation(self, source_text: str, current_translation: str, | |
| terms_info: List[Dict[str, Any]]) -> Dict[str, Any]: | |
| """Use Claude to edit the translation considering validation results and context. | |
| Args: | |
| source_text (str): Original Tibetan text | |
| current_translation (str): Current English translation | |
| terms_info (list): Terms information from validation results | |
| Returns: | |
| Dict[str, Any]: Edited translation with analysis | |
| """ | |
| # Build context for terms that need attention | |
| terms_context = [] | |
| for term in terms_info: | |
| analysis = term['analysis'] | |
| assessment = analysis['translation_assessment'] | |
| if assessment['should_be_counted'] and not assessment['translated_correctly']: | |
| term_context = { | |
| 'term': term['source_term'], | |
| 'current': analysis['translated_as'], | |
| 'suggested': analysis['glossary_translation'], | |
| 'categories': {} | |
| } | |
| # Add category information | |
| for cat_name, cat_data in term['categories'].items(): | |
| if cat_name in analysis['matching_categories']: | |
| term_context['categories'][cat_name] = { | |
| 'translations': cat_data.get('translations', []), | |
| 'definitions': cat_data.get('definitions', []) | |
| } | |
| terms_context.append(term_context) | |
| if not terms_context: | |
| return { | |
| 'edited_translation': current_translation, | |
| 'modified': False, | |
| 'reasoning': 'No terms requiring editing' | |
| } | |
| prompt = f"""You are an expert Tibetan translator. Review and improve this translation, focusing on accuracy and natural English: | |
| Tibetan text: {source_text} | |
| Current translation: {current_translation} | |
| The following terms need attention:""" | |
| for term in terms_context: | |
| prompt += f"\n\nTibetan term: {term['term']}" | |
| prompt += f"\nCurrently translated as: {term['current']}" | |
| prompt += f"\nGlossary suggestion: {term['suggested']}" | |
| for cat_name, cat_data in term['categories'].items(): | |
| prompt += f"\n{cat_name}:" | |
| if cat_data['definitions']: | |
| prompt += f"\n- Definitions: {', '.join(cat_data['definitions'])}" | |
| if cat_data['translations']: | |
| prompt += f"\n- Translations: {', '.join(cat_data['translations'])}" | |
| prompt += """ | |
| Please provide: | |
| 1. An improved translation that: | |
| - Maintains the meaning of the Tibetan text | |
| - Maintains the style and tone of the current translation | |
| - Uses appropriate technical terms from the glossary | |
| - Preserves any correct parts of the current translation | |
| 2. Your reasoning for the changes | |
| Respond in JSON format: | |
| { | |
| "edited_translation": "your improved translation", | |
| "reasoning": "explanation of changes and decisions", | |
| "modified": true/false | |
| }""" | |
| try: | |
| message = self.client.messages.create( | |
| model="claude-3-sonnet-20240229", | |
| max_tokens=1000, | |
| temperature=0, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| # Extract JSON from response | |
| import re | |
| json_match = re.search(r'\{.*\}', message.content[0].text, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group()) | |
| else: | |
| return { | |
| 'edited_translation': current_translation, | |
| 'modified': False, | |
| 'reasoning': 'Failed to parse LLM response' | |
| } | |
| except Exception as e: | |
| print(f"Error during LLM editing: {e}") | |
| return { | |
| 'edited_translation': current_translation, | |
| 'modified': False, | |
| 'reasoning': f'LLM editing failed: {str(e)}' | |
| } | |
| def post_edit_translations(self) -> List[Dict[str, Any]]: | |
| """Process all lines and post-edit translations using LLM. | |
| Returns: | |
| List[Dict[str, Any]]: List of edited translations with analysis | |
| """ | |
| edited_translations = [] | |
| for line in self.results['lines']: | |
| source = line['source'] | |
| target = line['target'] | |
| terms = line['terms'] | |
| if not terms: | |
| edited_translations.append({ | |
| 'line_number': line['line_number'], | |
| 'source': source, | |
| 'original': target, | |
| 'edited': target, | |
| 'modified': False, | |
| 'reasoning': 'No terms to edit' | |
| }) | |
| continue | |
| # Get LLM to edit the translation | |
| edit_result = self.edit_translation(source, target, terms) | |
| edited_translations.append({ | |
| 'line_number': line['line_number'], | |
| 'source': source, | |
| 'original': target, | |
| 'edited': edit_result['edited_translation'], | |
| 'modified': edit_result['modified'], | |
| 'reasoning': edit_result['reasoning'] | |
| }) | |
| return edited_translations | |
| def save_edits(self, edited_translations: List[Dict[str, Any]], | |
| output_path: str) -> None: | |
| """Save the post-edited translations with analysis to a file. | |
| Args: | |
| edited_translations (List[Dict[str, Any]]): Edited translations with analysis | |
| output_path (str): Path to save results | |
| """ | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump({ | |
| 'summary': { | |
| 'total_lines': len(edited_translations), | |
| 'modified_lines': sum(1 for t in edited_translations if t['modified']) | |
| }, | |
| 'translations': edited_translations | |
| }, f, ensure_ascii=False, indent=2) | |
| # Example usage: | |
| if __name__ == "__main__": | |
| import os | |
| # Load validation results | |
| with open('data/validation_results.json', 'r', encoding='utf-8') as f: | |
| validation_results = json.load(f) | |
| # Create editor and process translations | |
| editor = LLMTranslationEditor( | |
| validation_results, | |
| os.getenv('ANTHROPIC_API_KEY') | |
| ) | |
| edited_translations = editor.post_edit_translations() | |
| # Save results | |
| editor.save_edits(edited_translations, 'llm_post_edited_translations.json') | |
| # Print summary and examples | |
| print(f"Post-editing completed:") | |
| print(f"Total lines: {len(edited_translations)}") | |
| print(f"Modified lines: {sum(1 for t in edited_translations if t['modified'])}") | |
| print("\nExample modifications:") | |
| for trans in edited_translations: | |
| if trans['modified']: | |
| print(f"\nLine {trans['line_number']}:") | |
| print(f"Source : {trans['source']}") | |
| print(f"Original: {trans['original']}") | |
| print(f"Edited : {trans['edited']}") | |
| print(f"Reasoning: {trans['reasoning']}") |