Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| import time | |
| from pathlib import Path | |
| import gradio as gr | |
| from glossary_checker import GlossaryChecker | |
| from llm_post_editor import LLMTranslationEditor | |
| from trans_validator import TranslationValidator | |
| # Configure paths | |
| GLOSSARIES = { | |
| "84000 Glossary": "data/84000_glossary.json", | |
| } | |
| def load_validate_and_edit(file_obj, selected_glossary, api_key, progress=gr.Progress()): | |
| """Process translations with progress updates.""" | |
| if not api_key or not api_key.startswith("sk-"): | |
| return "Please provide a valid Anthropic API key (starts with 'sk-')" | |
| try: | |
| # Initialize progress tracking | |
| progress(0, desc="Starting processing...") | |
| # Read content from the file | |
| content = file_obj.decode('utf-8') | |
| progress(0.1, desc="File loaded") | |
| # Save content to temporary file | |
| temp_path = "temp_aligned.txt" | |
| with open(temp_path, "w", encoding='utf-8') as f: | |
| f.write(content) | |
| # Count total lines for progress tracking | |
| total_lines = len([line for line in content.split('\n') if line.strip()]) | |
| progress(0.15, desc=f"Found {total_lines} lines to process") | |
| # Initialize components | |
| progress(0.2, desc="Initializing validation...") | |
| glossary_path = GLOSSARIES[selected_glossary] | |
| checker = GlossaryChecker(glossary_path) | |
| validator = TranslationValidator(checker, api_key) | |
| # Run validation | |
| progress(0.3, desc="Running validation...") | |
| validation_results = validator.validate_translation(temp_path) | |
| progress(0.6, desc="Validation complete") | |
| # Initialize editor and get edited translations | |
| progress(0.7, desc="Starting post-editing...") | |
| editor = LLMTranslationEditor({"lines": validation_results}, api_key) | |
| edited_translations = editor.post_edit_translations() | |
| progress(0.9, desc="Post-editing complete") | |
| # Create result display | |
| progress(0.95, desc="Generating report...") | |
| markdown_output = [] | |
| # Add summary | |
| total_score = sum(r['score'] for r in validation_results) / len(validation_results) | |
| markdown_output.append(f"# Validation Results\n") | |
| markdown_output.append(f"**Overall Score**: {total_score:.2f}%\n") | |
| markdown_output.append("*(Score based on terms counted in scoring)*\n\n") | |
| markdown_output.append(f"**Total Lines**: {len(validation_results)}\n\n") | |
| # Add processing statistics | |
| modified_lines = sum(1 for t in edited_translations if t['modified']) | |
| markdown_output.append("## Processing Statistics\n") | |
| markdown_output.append(f"- Lines Modified: {modified_lines}/{len(validation_results)}\n") | |
| markdown_output.append(f"- Processed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n") | |
| # Add detailed results for each line | |
| for idx, (validation, editing) in enumerate(zip(validation_results, edited_translations)): | |
| markdown_output.append(f"## Line {validation['line_number']}\n") | |
| markdown_output.append(f"**Score**: {validation['score']:.2f}%\n") | |
| markdown_output.append(f"**Source**: {validation['source']}\n") | |
| markdown_output.append(f"**Current Translation**: {validation['target']}\n") | |
| # Add edited translation if available and modified | |
| if editing['modified']: | |
| markdown_output.append(f"\n**Post-Edited Translation**: {editing['edited']}\n") | |
| markdown_output.append(f"\n**Editing Notes**: {editing['reasoning']}\n") | |
| if validation['terms']: | |
| # Separate terms into counted and not counted | |
| counted_terms = [] | |
| other_terms = [] | |
| for term in validation['terms']: | |
| if term['analysis']['translation_assessment']['should_be_counted']: | |
| counted_terms.append(term) | |
| else: | |
| other_terms.append(term) | |
| # Display counted terms in collapsible section | |
| if counted_terms: | |
| markdown_output.append("\n<details>") | |
| markdown_output.append("<summary>π Terms Counted in Scoring</summary>\n") | |
| for term in counted_terms: | |
| analysis = term['analysis'] | |
| assessment = analysis['translation_assessment'] | |
| markdown_output.append(f"\n#### `{term['source_term']}` {'β ' if assessment['translated_correctly'] else 'β'}\n") | |
| markdown_output.append(f"- Found Translation: **{analysis['translated_as']}**\n") | |
| markdown_output.append(f"- Expected Translation: **{analysis['glossary_translation']}**\n") | |
| # Add categories in collapsible section | |
| markdown_output.append("\n<details>") | |
| markdown_output.append("<summary>Show Categories & Definitions</summary>\n") | |
| for cat_name in analysis['matching_categories']: | |
| cat_data = term['categories'].get(cat_name, {}) | |
| markdown_output.append(f"\n*{cat_name}*:\n") | |
| if 'translations' in cat_data: | |
| markdown_output.append(f"- Translations: {', '.join(cat_data['translations'])}\n") | |
| if 'definitions' in cat_data: | |
| markdown_output.append(f"- Definitions: {', '.join(cat_data['definitions'])}\n") | |
| markdown_output.append("</details>\n") | |
| markdown_output.append("</details>\n") | |
| # Display other terms in separate collapsible section | |
| if other_terms: | |
| markdown_output.append("\n<details>") | |
| markdown_output.append("<summary>Terms Not Counted in Scoring</summary>\n") | |
| for term in other_terms: | |
| analysis = term['analysis'] | |
| markdown_output.append(f"\n#### `{term['source_term']}`\n") | |
| markdown_output.append(f"- Found Translation: {analysis['translated_as']}\n") | |
| markdown_output.append(f"- Note: Term not counted due to usage context\n") | |
| # Add categories in collapsible section | |
| markdown_output.append("\n<details>") | |
| markdown_output.append("<summary>Show Categories & Definitions</summary>\n") | |
| for cat_name in analysis['matching_categories']: | |
| cat_data = term['categories'].get(cat_name, {}) | |
| markdown_output.append(f"\n*{cat_name}*:\n") | |
| if 'translations' in cat_data: | |
| markdown_output.append(f"- Translations: {', '.join(cat_data['translations'])}\n") | |
| if 'definitions' in cat_data: | |
| markdown_output.append(f"- Definitions: {', '.join(cat_data['definitions'])}\n") | |
| markdown_output.append("</details>\n") | |
| markdown_output.append("</details>\n") | |
| markdown_output.append("\n---\n") | |
| else: | |
| markdown_output.append("\n*No glossary terms found in this line*\n\n---\n") | |
| # Clean up temp file | |
| os.remove(temp_path) | |
| progress(1.0, desc="Processing complete!") | |
| return "\n".join(markdown_output) | |
| except Exception as e: | |
| if os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| return f"Error during processing: {str(e)}\n\nPlease check your input file and API key and try again." | |
| # Create Gradio interface with examples | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Translation Validation & Editing Tool") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="Upload aligned translations file (tab-separated)", | |
| type="binary" | |
| ) | |
| glossary_input = gr.Dropdown( | |
| choices=list(GLOSSARIES.keys()), | |
| label="Select Glossary", | |
| value=list(GLOSSARIES.keys())[0] | |
| ) | |
| api_key_input = gr.Textbox( | |
| label="Anthropic API Key", | |
| placeholder="sk-...", | |
| type="password" | |
| ) | |
| submit_btn = gr.Button("Process Translations", variant="primary") | |
| # Add examples | |
| gr.Examples( | |
| examples=[ | |
| [str(Path("data/example_translations.txt").resolve()), "84000 Glossary", "sk-..."], | |
| ], | |
| inputs=[file_input, glossary_input, api_key_input], | |
| label="Example Inputs" | |
| ) | |
| with gr.Column(): | |
| output = gr.Markdown() | |
| gr.Markdown("""### Instructions | |
| 1. Upload a tab-separated file with Tibetan source and English translations | |
| 2. Select the glossary to use for validation | |
| 3. Enter your Anthropic API key | |
| 4. Click "Process Translations" and wait for results | |
| The tool will: | |
| - Validate translations against the glossary | |
| - Calculate accuracy scores | |
| - Suggest improvements using Claude | |
| - Show detailed term analysis | |
| Key: | |
| - π Terms used for scoring | |
| - β Correctly translated terms | |
| - β Terms needing improvement""") | |
| submit_btn.click( | |
| fn=load_validate_and_edit, | |
| inputs=[file_input, glossary_input, api_key_input], | |
| outputs=output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |