syed7 commited on
Commit
3ec6f9a
·
verified ·
1 Parent(s): a0c5855

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -0
app.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ import openai
4
+ from io import BytesIO
5
+ import io
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.lib.pagesizes import letter, A4
8
+ from reportlab.pdfbase import pdfmetrics
9
+ from reportlab.pdfbase.ttfonts import TTFont
10
+ from reportlab.lib.utils import simpleSplit
11
+ from reportlab.lib.colors import black
12
+ import arabic_reshaper
13
+ from bidi.algorithm import get_display
14
+ import os
15
+
16
+
17
+ # Get API key from Hugging Face secrets
18
+ api_key = os.environ.get('OPENAI_API_KEY')
19
+
20
+ def register_fonts():
21
+ """Register fonts for different languages"""
22
+ try:
23
+ # Using Jameel Noori Nastaleeq for Urdu
24
+ pdfmetrics.registerFont(TTFont('Jameel', 'fonts/Jameel Noori Nastaleeq.ttf'))
25
+ # Using Noto Naskh Arabic for Arabic
26
+ pdfmetrics.registerFont(TTFont('NotoNaskhArabic', 'fonts/NotoNaskhArabic-Regular.ttf'))
27
+ # Using Noto Sans for other languages
28
+ pdfmetrics.registerFont(TTFont('NotoSans', 'fonts/NotoSans-Regular.ttf'))
29
+ except Exception as e:
30
+ st.warning(f"Font files not found. Default fonts will be used. Error: {str(e)}")
31
+
32
+ def extract_text_from_pdf(pdf_file):
33
+ """Extract text from uploaded PDF file"""
34
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
35
+ text = ""
36
+ for page in pdf_reader.pages:
37
+ text += page.extract_text()
38
+ return text
39
+
40
+ def create_pdf(text, target_language):
41
+ """Create a PDF file from text with proper language support"""
42
+ buffer = BytesIO()
43
+ c = canvas.Canvas(buffer, pagesize=A4)
44
+ width, height = A4
45
+
46
+ # Set initial Y position from top
47
+ y = height - 50
48
+ margin = 50
49
+
50
+ # Special handling for Urdu
51
+ if target_language == "Urdu":
52
+ try:
53
+ # Use Jameel Noori Nastaleeq font for Urdu
54
+ c.setFont('Jameel', 16)
55
+ # For Urdu text, don't reshape but apply BIDI
56
+ text = text.strip()
57
+ lines = text.split('\n')
58
+ except:
59
+ c.setFont('Helvetica', 12)
60
+ st.warning("Urdu font not loaded properly. Using default font.")
61
+ elif target_language == "Arabic":
62
+ try:
63
+ c.setFont('NotoNaskhArabic', 14)
64
+ text = arabic_reshaper.reshape(text)
65
+ text = get_display(text)
66
+ lines = text.split('\n')
67
+ except:
68
+ c.setFont('Helvetica', 12)
69
+ lines = text.split('\n')
70
+ else:
71
+ try:
72
+ c.setFont('NotoSans', 12)
73
+ lines = text.split('\n')
74
+ except:
75
+ c.setFont('Helvetica', 12)
76
+ lines = text.split('\n')
77
+
78
+ # Draw text with proper spacing
79
+ line_height = c._fontsize * 2.5 if target_language == "Urdu" else c._fontsize * 1.5
80
+
81
+ for line in lines:
82
+ if y < 50: # If near bottom of page
83
+ c.showPage()
84
+ y = height - 50
85
+ # Reset font for new page
86
+ if target_language == "Urdu":
87
+ c.setFont('Jameel', 16)
88
+ elif target_language == "Arabic":
89
+ c.setFont('NotoNaskhArabic', 14)
90
+ else:
91
+ c.setFont('NotoSans', 12)
92
+
93
+ # Calculate line width for RTL positioning
94
+ line_width = c.stringWidth(line, c._fontname, c._fontsize)
95
+
96
+ # Position text based on language
97
+ if target_language in ['Arabic', 'Urdu']:
98
+ x = width - margin - line_width # Right-aligned
99
+ # For Urdu, we'll write the original text without reshaping
100
+ if target_language == "Urdu":
101
+ c.drawRightString(width - margin, y, line)
102
+ else:
103
+ c.drawString(x, y, line)
104
+ else:
105
+ x = margin # Left-aligned
106
+ c.drawString(x, y, line)
107
+
108
+ y -= line_height
109
+
110
+ c.save()
111
+ buffer.seek(0)
112
+ return buffer
113
+
114
+ def translate_text(text, target_language,api_key):
115
+ """Translate text using OpenAI API with improved prompting"""
116
+ try:
117
+ client = openai.OpenAI(api_key=api_key)
118
+
119
+ # Enhanced prompt for better translation
120
+ system_prompt = f"""You are a professional translator specializing in {target_language}.
121
+ Translate the following text to {target_language}, ensuring:
122
+ 1. Technical terms are accurately translated
123
+ 2. Maintain formal language and proper grammar
124
+ 3. Preserve formatting and structure
125
+ 4. Keep proper nouns and technical terms like 'AI', 'LLMs', 'Python' in English where appropriate
126
+ 5. Use culturally appropriate expressions
127
+ 6. For Urdu/Arabic, ensure proper character connections and diacritics
128
+ 7. Maintain professional and accurate technical translations
129
+ """
130
+
131
+ response = client.chat.completions.create(
132
+ model="gpt-3.5-turbo",
133
+ messages=[
134
+ {"role": "system", "content": system_prompt},
135
+ {"role": "user", "content": text}
136
+ ],
137
+ temperature=0.3
138
+ )
139
+ return response.choices[0].message.content
140
+ except Exception as e:
141
+ return f"Translation error: {str(e)}"
142
+
143
+ # Set page config
144
+ st.set_page_config(page_title="PDF Translator", layout="wide")
145
+
146
+ # Try to register fonts at startup
147
+ register_fonts()
148
+
149
+ # Main app interface
150
+ st.title("PDF Document Translator")
151
+
152
+ # Add custom CSS for better text display
153
+ st.markdown("""
154
+ <style>
155
+ .stTextArea textarea {
156
+ font-size: 16px !important;
157
+ }
158
+ </style>
159
+ """, unsafe_allow_html=True)
160
+
161
+ # # API Key input with better security
162
+ # api_key = st.text_input("Enter your OpenAI API Key", type="password")
163
+
164
+ # Language selection
165
+ languages = {
166
+ "English": "English",
167
+ "Urdu": "Urdu",
168
+ "Arabic": "Arabic",
169
+ "Roman English": "Roman English",
170
+ "Roman Urdu": "Roman Urdu",
171
+ "Hindi": "Hindi",
172
+ "Spanish": "Spanish",
173
+ "French": "French"
174
+ }
175
+
176
+ # File uploader
177
+ uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
178
+
179
+ # Language selector
180
+ target_language = st.selectbox(
181
+ "Select target language",
182
+ options=list(languages.keys())
183
+ )
184
+
185
+ # Create two columns for original and translated text
186
+ col1, col2 = st.columns(2)
187
+
188
+ if uploaded_file is not None and api_key:
189
+ # Extract text from PDF
190
+ with st.spinner("Extracting text from PDF..."):
191
+ text = extract_text_from_pdf(uploaded_file)
192
+
193
+ # Show original text
194
+ with col1:
195
+ st.subheader("Original Text")
196
+ st.text_area("", value=text, height=400, key="original_text")
197
+
198
+ # Initialize session state for translated text
199
+ if 'translated_text' not in st.session_state:
200
+ st.session_state.translated_text = None
201
+
202
+ # Translate button
203
+ if st.button("Translate"):
204
+ with st.spinner("Translating..."):
205
+ translated_text = translate_text(text, languages[target_language], api_key)
206
+ st.session_state.translated_text = translated_text
207
+
208
+ # Show translated text
209
+ with col2:
210
+ st.subheader(f"Translated Text ({target_language})")
211
+ st.text_area("", value=translated_text, height=400, key="translated_text")
212
+
213
+ # Show download button if translation exists
214
+ if st.session_state.translated_text:
215
+ # Create PDF button
216
+ if st.download_button(
217
+ label="Download Translated PDF",
218
+ data=create_pdf(st.session_state.translated_text, target_language),
219
+ file_name=f"translated_{target_language}.pdf",
220
+ mime="application/pdf"
221
+ ):
222
+ st.success("PDF downloaded successfully!")
223
+
224
+ elif not api_key:
225
+ st.warning("Please enter your OpenAI API key to proceed.")
226
+
227
+ # Add instructions and notes
228
+ st.markdown("""
229
+ ### Instructions:
230
+ 1. Upload your PDF file
231
+ 2. Select your target language
232
+ 3. Click 'Translate' to get your translation
233
+ 4. Review the translation
234
+ 5. Click 'Download Translated PDF' to save as PDF
235
+
236
+ """)