Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| BabelSlide v2.0 - Professional Document Translator | |
| Streamlit application for translating PDF, DOCX, and PPTX documents using AI | |
| """ | |
| import streamlit as st | |
| import tempfile | |
| from pathlib import Path | |
| import sys | |
| import os | |
| from translators.chatgpt_translator import ChatGPTTranslator | |
| from translators.deepseek_translator import DeepSeekTranslator | |
| from processors.pdf_processor import PDFProcessor | |
| from processors.docx_processor import DOCXProcessor | |
| from processors.pptx_processor import PPTXProcessor | |
| from utils.constants import LANGUAGES, API_PROVIDERS | |
| from utils.validator import FileValidator | |
| from utils.logger import setup_logger, ProcessLogger | |
| from core.exceptions import ( | |
| BabelSlideException, | |
| ValidationError, | |
| UnsupportedFileError, | |
| FileSizeError, | |
| APIKeyError, | |
| TranslationError, | |
| ProcessorError | |
| ) | |
| class BabelSlideStreamlitApp: | |
| """Streamlit interface for BabelSlide application""" | |
| def __init__(self): | |
| self.logger = setup_logger("BabelSlideUI") | |
| self.process_logger = ProcessLogger(self.logger) | |
| # Initialize session state | |
| if 'processing' not in st.session_state: | |
| st.session_state.processing = False | |
| if 'translation_result' not in st.session_state: | |
| st.session_state.translation_result = None | |
| if 'review_result' not in st.session_state: | |
| st.session_state.review_result = None | |
| def setup_page_config(self): | |
| """Configure Streamlit page""" | |
| st.set_page_config( | |
| page_title="BabelSlide - Document Translator", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| text-align: center; | |
| padding: 2rem 0; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border-radius: 12px; | |
| margin-bottom: 2rem; | |
| } | |
| .success-box { | |
| background: #d1fae5; | |
| border: 1px solid #10b981; | |
| border-radius: 8px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .error-box { | |
| background: #fef2f2; | |
| border: 1px solid #ef4444; | |
| border-radius: 8px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .info-box { | |
| background: #eff6ff; | |
| border: 1px solid #3b82f6; | |
| border-radius: 8px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .stButton > button { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border: none; | |
| border-radius: 8px; | |
| padding: 0.5rem 2rem; | |
| font-weight: 600; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def render_header(self): | |
| """Render application header""" | |
| st.markdown(""" | |
| <div class="main-header"> | |
| <h1>π BabelSlide v2.0</h1> | |
| <p>Professional Document Translation using AI β’ PDF β’ DOCX β’ PPTX</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| def render_sidebar(self): | |
| """Render configuration sidebar""" | |
| st.sidebar.markdown("## βοΈ Configuration") | |
| # API Provider | |
| api_provider = st.sidebar.selectbox( | |
| "AI Provider", | |
| options=list(API_PROVIDERS.keys()), | |
| index=0, | |
| help="Choose your preferred translation AI" | |
| ) | |
| # API Key | |
| api_key = st.sidebar.text_input( | |
| "API Key", | |
| type="password", | |
| placeholder="Enter your API key (sk-... for OpenAI)", | |
| help="Your API key is never stored permanently" | |
| ) | |
| st.sidebar.markdown("---") | |
| # Languages | |
| col1, col2 = st.sidebar.columns(2) | |
| with col1: | |
| source_lang = st.selectbox( | |
| "Source Language", | |
| options=list(LANGUAGES.keys()), | |
| index=list(LANGUAGES.keys()).index("English"), | |
| help="Language of the original document" | |
| ) | |
| with col2: | |
| target_lang = st.selectbox( | |
| "Target Language", | |
| options=list(LANGUAGES.keys()), | |
| index=list(LANGUAGES.keys()).index("Polish"), | |
| help="Language to translate to" | |
| ) | |
| st.sidebar.markdown("---") | |
| st.sidebar.markdown("### π Supported Formats") | |
| st.sidebar.info("β’ PDF documents\nβ’ DOCX (Word) files\nβ’ PPTX (PowerPoint) presentations") | |
| st.sidebar.warning("Maximum file size: 50 MB") | |
| return api_provider, api_key, source_lang, target_lang | |
| def render_file_upload(self): | |
| """Render file upload section""" | |
| st.markdown("## π Document Upload") | |
| uploaded_file = st.file_uploader( | |
| "Choose a document to translate", | |
| type=['pdf', 'docx', 'pptx'], | |
| help="Upload PDF, DOCX, or PPTX files (max 50 MB)", | |
| accept_multiple_files=False | |
| ) | |
| if uploaded_file: | |
| col1, col2, col3 = st.columns([2, 1, 1]) | |
| with col1: | |
| st.info(f"π **File:** {uploaded_file.name}") | |
| with col2: | |
| file_size = len(uploaded_file.getvalue()) / (1024 * 1024) | |
| st.info(f"π **Size:** {file_size:.1f} MB") | |
| with col3: | |
| file_type = uploaded_file.name.split('.')[-1].upper() | |
| st.info(f"π **Type:** {file_type}") | |
| return uploaded_file | |
| def validate_inputs(self, file, api_provider, api_key, source_lang, target_lang): | |
| """Validate all inputs before processing""" | |
| errors = [] | |
| if not file: | |
| errors.append("Please upload a document") | |
| if not api_key or not api_key.strip(): | |
| errors.append("Please provide an API key") | |
| if source_lang == target_lang: | |
| errors.append("Source and target languages must be different") | |
| # Validate file if provided | |
| if file: | |
| try: | |
| # Create temporary file for validation | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.name.split('.')[-1]}") as tmp_file: | |
| tmp_file.write(file.getvalue()) | |
| tmp_file_path = Path(tmp_file.name) | |
| FileValidator.validate_file(tmp_file_path) | |
| tmp_file_path.unlink() # Clean up | |
| except (ValidationError, UnsupportedFileError, FileSizeError) as e: | |
| errors.append(f"File validation error: {str(e)}") | |
| # Validate API key format | |
| try: | |
| if api_key: | |
| FileValidator.validate_api_key(api_key.strip(), api_provider) | |
| except ValidationError as e: | |
| errors.append(f"API key error: {str(e)}") | |
| return errors | |
| def process_document(self, file, api_provider, api_key, source_lang, target_lang): | |
| """Process document translation""" | |
| try: | |
| # Create temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.name.split('.')[-1]}") as tmp_file: | |
| tmp_file.write(file.getvalue()) | |
| tmp_file_path = Path(tmp_file.name) | |
| # Create translator | |
| if api_provider == "ChatGPT": | |
| translator = ChatGPTTranslator(api_key.strip()) | |
| elif api_provider == "DeepSeek": | |
| translator = DeepSeekTranslator(api_key.strip()) | |
| else: | |
| raise ValueError(f"Unsupported provider: {api_provider}") | |
| # Create processor based on file extension | |
| extension = tmp_file_path.suffix.lower() | |
| if extension == '.pdf': | |
| processor = PDFProcessor(translator) | |
| elif extension == '.docx': | |
| processor = DOCXProcessor(translator) | |
| elif extension == '.pptx': | |
| processor = PPTXProcessor(translator) | |
| else: | |
| raise ValueError(f"Unsupported file format: {extension}") | |
| # Progress tracking | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| def progress_callback(progress_val, desc): | |
| progress_bar.progress(progress_val) | |
| status_text.text(desc) | |
| # Process document | |
| status_text.text("Starting translation...") | |
| output_path, summary_text = processor.process_document( | |
| tmp_file_path, | |
| source_lang, | |
| target_lang, | |
| progress_callback | |
| ) | |
| # Generate review | |
| status_text.text("Generating review...") | |
| review_text = self.generate_review(summary_text, source_lang, translator) | |
| # Clean up temp file | |
| tmp_file_path.unlink() | |
| progress_bar.progress(1.0) | |
| status_text.text("β Translation completed!") | |
| return output_path, review_text, summary_text | |
| except Exception as e: | |
| self.logger.error(f"Translation error: {str(e)}") | |
| raise | |
| def generate_review(self, translated_text: str, source_lang: str, translator) -> str: | |
| """Generate translation review""" | |
| try: | |
| system_prompt = f"""Generate a comprehensive translation review in {source_lang} covering: | |
| 1. Translation quality assessment | |
| 2. Coherence and consistency | |
| 3. Technical terminology accuracy | |
| 4. Overall readability | |
| 5. Recommendations for improvement | |
| Keep the review concise but informative.""" | |
| # Use translator's API to generate review | |
| review = translator._make_translation_request( | |
| f"Review this translated text:\n\n{translated_text[:2000]}...", | |
| "English", | |
| source_lang | |
| ) | |
| return translator._clean_translation_output(review) | |
| except Exception as e: | |
| return f"Review generation failed: {str(e)}" | |
| def render_results(self): | |
| """Render translation results""" | |
| if st.session_state.translation_result: | |
| st.markdown("## π₯ Results") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### π Translated Document") | |
| if st.session_state.translation_result: | |
| with open(st.session_state.translation_result, 'rb') as file: | |
| st.download_button( | |
| label="β¬οΈ Download Translated Document", | |
| data=file.read(), | |
| file_name=Path(st.session_state.translation_result).name, | |
| mime="application/octet-stream" | |
| ) | |
| with col2: | |
| st.markdown("### π Translation Review") | |
| if st.session_state.review_result: | |
| st.download_button( | |
| label="β¬οΈ Download Review", | |
| data=st.session_state.review_result, | |
| file_name="translation_review.txt", | |
| mime="text/plain" | |
| ) | |
| # Summary | |
| if hasattr(st.session_state, 'summary_text') and st.session_state.summary_text: | |
| st.markdown("### π Translation Summary") | |
| with st.expander("View Summary", expanded=False): | |
| st.text_area( | |
| "Summary", | |
| value=st.session_state.summary_text[:1000] + "..." if len(st.session_state.summary_text) > 1000 else st.session_state.summary_text, | |
| height=200, | |
| disabled=True, | |
| label_visibility="collapsed" | |
| ) | |
| def run(self): | |
| """Main application loop""" | |
| self.setup_page_config() | |
| self.render_header() | |
| # Sidebar configuration | |
| api_provider, api_key, source_lang, target_lang = self.render_sidebar() | |
| # Main content | |
| uploaded_file = self.render_file_upload() | |
| # Translation button | |
| st.markdown("---") | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| translate_button = st.button( | |
| "π Translate Document", | |
| disabled=st.session_state.processing, | |
| use_container_width=True | |
| ) | |
| # Process translation | |
| if translate_button: | |
| # Validate inputs | |
| errors = self.validate_inputs(uploaded_file, api_provider, api_key, source_lang, target_lang) | |
| if errors: | |
| st.error("β **Please fix the following errors:**") | |
| for error in errors: | |
| st.error(f"β’ {error}") | |
| else: | |
| st.session_state.processing = True | |
| try: | |
| with st.spinner("Translating document..."): | |
| output_path, review_text, summary_text = self.process_document( | |
| uploaded_file, api_provider, api_key, source_lang, target_lang | |
| ) | |
| # Store results | |
| st.session_state.translation_result = output_path | |
| st.session_state.review_result = review_text | |
| st.session_state.summary_text = summary_text | |
| st.success(f"β **Translation completed successfully!**\n\n" | |
| f"π **File:** {uploaded_file.name}\n" | |
| f"π **Translation:** {source_lang} β {target_lang}\n" | |
| f"π€ **Provider:** {api_provider}") | |
| # Auto-refresh to show results | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"β **Translation failed:** {str(e)}") | |
| finally: | |
| st.session_state.processing = False | |
| # Show results if available | |
| self.render_results() | |
| # Footer | |
| st.markdown("---") | |
| st.markdown( | |
| "<div style='text-align: center; color: #666;'>" | |
| "<strong>BabelSlide v2.0</strong> β’ Professional document translation β’ Built for global communication" | |
| "</div>", | |
| unsafe_allow_html=True | |
| ) | |
| # Main entry point | |
| if __name__ == "__main__": | |
| app = BabelSlideStreamlitApp() | |
| app.run() |