Spaces:
Build error
Build error
| import streamlit as st | |
| import PyPDF2 | |
| import openai | |
| from io import BytesIO | |
| import io | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.pagesizes import letter, A4 | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| from reportlab.lib.utils import simpleSplit | |
| from reportlab.lib.colors import black | |
| import arabic_reshaper | |
| from bidi.algorithm import get_display | |
| import os | |
| def register_fonts(): | |
| """Register fonts for different languages""" | |
| try: | |
| # Using Noto Nastaliq Urdu for better Urdu rendering | |
| pdfmetrics.registerFont(TTFont('NotoNastaliqUrdu', 'NotoNastaliqUrdu-Regular.ttf')) | |
| # Using Noto Naskh Arabic for Arabic | |
| pdfmetrics.registerFont(TTFont('NotoNaskhArabic', 'NotoNaskhArabic-Regular.ttf')) | |
| # Using Noto Sans for other languages | |
| pdfmetrics.registerFont(TTFont('NotoSans', 'NotoSans-Regular.ttf')) | |
| except Exception as e: | |
| st.warning("Font files not found. Default fonts will be used.") | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from uploaded PDF file""" | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def create_pdf(text, target_language): | |
| """Create a PDF file from text with proper language support""" | |
| buffer = BytesIO() | |
| c = canvas.Canvas(buffer, pagesize=A4) | |
| width, height = A4 | |
| # Set initial Y position from top | |
| y = height - 50 | |
| margin = 50 | |
| # Configure font and size based on language | |
| if target_language == "Urdu": | |
| try: | |
| c.setFont('NotoNastaliqUrdu', 16) # Larger size for Nastaliq | |
| text = arabic_reshaper.reshape(text) | |
| text = get_display(text) | |
| except: | |
| c.setFont('Helvetica', 12) | |
| elif target_language == "Arabic": | |
| try: | |
| c.setFont('NotoNaskhArabic', 14) | |
| text = arabic_reshaper.reshape(text) | |
| text = get_display(text) | |
| except: | |
| c.setFont('Helvetica', 12) | |
| else: | |
| try: | |
| c.setFont('NotoSans', 12) | |
| except: | |
| c.setFont('Helvetica', 12) | |
| # Split text into lines with proper width calculation | |
| max_width = width - (2 * margin) | |
| lines = [] | |
| for paragraph in text.split('\n'): | |
| if target_language in ['Arabic', 'Urdu']: | |
| # RTL text handling with proper spacing | |
| words = paragraph.split() | |
| current_line = [] | |
| line_width = 0 | |
| for word in reversed(words): | |
| word_width = c.stringWidth(word, c._fontname, c._fontsize) | |
| if line_width + word_width <= max_width: | |
| current_line.insert(0, word) | |
| line_width += word_width + c.stringWidth(' ', c._fontname, c._fontsize) | |
| else: | |
| lines.append(' '.join(current_line)) | |
| current_line = [word] | |
| line_width = word_width | |
| if current_line: | |
| lines.append(' '.join(current_line)) | |
| else: | |
| # LTR text handling | |
| words = paragraph.split() | |
| current_line = [] | |
| line_width = 0 | |
| for word in words: | |
| word_width = c.stringWidth(word, c._fontname, c._fontsize) | |
| if line_width + word_width <= max_width: | |
| current_line.append(word) | |
| line_width += word_width + c.stringWidth(' ', c._fontname, c._fontsize) | |
| else: | |
| lines.append(' '.join(current_line)) | |
| current_line = [word] | |
| line_width = word_width | |
| if current_line: | |
| lines.append(' '.join(current_line)) | |
| # Draw text with proper spacing | |
| line_height = c._fontsize * 1.5 | |
| for line in lines: | |
| if y < 50: | |
| c.showPage() | |
| y = height - 50 | |
| # Reset font for new page | |
| if target_language == "Urdu": | |
| try: | |
| c.setFont('NotoNastaliqUrdu', 16) | |
| except: | |
| c.setFont('Helvetica', 12) | |
| elif target_language == "Arabic": | |
| try: | |
| c.setFont('NotoNaskhArabic', 14) | |
| except: | |
| c.setFont('Helvetica', 12) | |
| else: | |
| try: | |
| c.setFont('NotoSans', 12) | |
| except: | |
| c.setFont('Helvetica', 12) | |
| if target_language in ['Arabic', 'Urdu']: | |
| text_width = c.stringWidth(line, c._fontname, c._fontsize) | |
| x = width - margin - text_width | |
| else: | |
| x = margin | |
| c.drawString(x, y, line) | |
| y -= line_height | |
| c.save() | |
| buffer.seek(0) | |
| return buffer | |
| def translate_text(text, target_language, api_key): | |
| """Translate text using OpenAI API with improved prompting""" | |
| try: | |
| client = openai.OpenAI(api_key=api_key) | |
| # Enhanced prompt for better translation | |
| system_prompt = f"""You are a professional translator specializing in {target_language}. | |
| Translate the following text to {target_language}, ensuring: | |
| 1. Technical terms are accurately translated | |
| 2. Maintain formal language and proper grammar | |
| 3. Preserve formatting and structure | |
| 4. Keep proper nouns and technical terms like 'AI', 'LLMs', 'Python' in English where appropriate | |
| 5. Use culturally appropriate expressions | |
| """ | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": text} | |
| ], | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Translation error: {str(e)}" | |
| # Set page config | |
| st.set_page_config(page_title="PDF Translator", layout="wide") | |
| # Try to register fonts at startup | |
| register_fonts() | |
| # Main app interface | |
| st.title("PDF Document Translator") | |
| # API Key input with better security | |
| api_key = st.text_input("Enter your OpenAI API Key", type="password") | |
| # Language selection with improved options | |
| languages = { | |
| "English": "English", | |
| "Urdu": "Urdu", | |
| "Arabic": "Arabic", | |
| "Roman English": "Roman English", | |
| "Roman Urdu": "Roman Urdu", | |
| "Hindi": "Hindi", | |
| "Spanish": "Spanish", | |
| "French": "French", | |
| "Chinese": "Chinese", | |
| "Japanese": "Japanese" | |
| } | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload your PDF file", type="pdf") | |
| # Language selector | |
| target_language = st.selectbox( | |
| "Select target language", | |
| options=list(languages.keys()) | |
| ) | |
| # Create two columns for original and translated text | |
| col1, col2 = st.columns(2) | |
| if uploaded_file is not None and api_key: | |
| # Extract text from PDF | |
| with st.spinner("Extracting text from PDF..."): | |
| text = extract_text_from_pdf(uploaded_file) | |
| # Show original text | |
| with col1: | |
| st.subheader("Original Text") | |
| st.text_area("", value=text, height=400, key="original_text") | |
| # Initialize session state for translated text | |
| if 'translated_text' not in st.session_state: | |
| st.session_state.translated_text = None | |
| # Translate button | |
| if st.button("Translate"): | |
| with st.spinner("Translating..."): | |
| translated_text = translate_text(text, languages[target_language], api_key) | |
| st.session_state.translated_text = translated_text | |
| # Show translated text | |
| with col2: | |
| st.subheader(f"Translated Text ({target_language})") | |
| st.text_area("", value=translated_text, height=400, key="translated_text") | |
| # Show download button if translation exists | |
| if st.session_state.translated_text: | |
| # Create PDF button | |
| if st.download_button( | |
| label="Download Translated PDF", | |
| data=create_pdf(st.session_state.translated_text, target_language), | |
| file_name=f"translated_{target_language}.pdf", | |
| mime="application/pdf" | |
| ): | |
| st.success("PDF downloaded successfully!") | |
| elif not api_key: | |
| st.warning("Please enter your OpenAI API key to proceed.") | |
| # Add instructions and notes | |
| st.markdown(""" | |
| ### Instructions: | |
| 1. Enter your OpenAI API key | |
| 2. Upload your PDF file | |
| 3. Select your target language | |
| 4. Click 'Translate' to get your translation | |
| 5. Review the translation | |
| 6. Click 'Download Translated PDF' to save as PDF | |
| Note: For best results with Arabic and Urdu translations, make sure you have a stable internet connection for consistent API responses. | |
| """) |