ABDALLALSWAITI commited on
Commit
a7b614f
Β·
verified Β·
1 Parent(s): 7a6b09b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +246 -99
src/streamlit_app.py CHANGED
@@ -4,6 +4,8 @@ import os
4
  import tempfile
5
  import shutil
6
  from pathlib import Path
 
 
7
 
8
  st.set_page_config(
9
  page_title="HTML to PDF Converter",
@@ -11,9 +13,74 @@ st.set_page_config(
11
  layout="wide"
12
  )
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def convert_html_to_pdf(html_content, aspect_ratio):
15
  """
16
- Convert HTML content to PDF using Puppeteer
17
 
18
  Args:
19
  html_content: String containing HTML content
@@ -27,12 +94,38 @@ def convert_html_to_pdf(html_content, aspect_ratio):
27
  # Create temporary directory for processing
28
  temp_dir = tempfile.mkdtemp()
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Save HTML content to temporary file
31
  html_file = os.path.join(temp_dir, "input.html")
32
  with open(html_file, 'w', encoding='utf-8') as f:
33
  f.write(html_content)
34
 
35
- # Get the path to puppeteer_pdf.js (it's in parent directory)
36
  script_dir = os.path.dirname(os.path.abspath(__file__))
37
  puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
38
 
@@ -75,8 +168,8 @@ def convert_html_to_pdf(html_content, aspect_ratio):
75
  # Page header
76
  st.title("πŸ“„ HTML to PDF Converter")
77
  st.markdown("""
78
- Convert HTML files or HTML code to PDF using Puppeteer.
79
- Supports multiple aspect ratios: **16:9** (landscape), **1:1** (square), and **9:16** (portrait).
80
  """)
81
 
82
  # Create tabs
@@ -84,77 +177,95 @@ tab1, tab2 = st.tabs(["πŸ“€ Upload HTML File", "πŸ“ Paste HTML Code"])
84
 
85
  # Tab 1: Upload HTML File
86
  with tab1:
87
- col1, col2 = st.columns([1, 1])
 
 
 
 
 
 
88
 
89
- with col1:
90
- uploaded_file = st.file_uploader(
91
- "Choose an HTML file",
92
- type=['html', 'htm'],
93
- key="file_uploader",
94
- help="Upload an HTML file (max 200MB)",
95
- accept_multiple_files=False
96
- )
97
 
98
- # Display file info if uploaded
99
- if uploaded_file is not None:
100
- st.info(f"πŸ“ File: {uploaded_file.name} ({uploaded_file.size} bytes)")
 
 
 
 
101
 
102
- aspect_ratio_file = st.radio(
103
- "Aspect Ratio",
104
- options=["16:9", "1:1", "9:16"],
105
- index=0,
106
- key="aspect_file",
107
- help="Select the page orientation and dimensions"
108
- )
109
 
110
- convert_file_btn = st.button("πŸ”„ Convert to PDF", key="convert_file", type="primary", use_container_width=True, disabled=(uploaded_file is None))
111
-
112
- with col2:
113
- if convert_file_btn and uploaded_file is not None:
114
- try:
115
- with st.spinner("Converting HTML to PDF..."):
116
- # Reset file pointer to beginning
117
- uploaded_file.seek(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- # Read uploaded file with proper encoding handling
120
- try:
121
- html_content = uploaded_file.getvalue().decode('utf-8')
122
- except UnicodeDecodeError:
123
- # Try with latin-1 encoding if utf-8 fails
124
- uploaded_file.seek(0)
125
- html_content = uploaded_file.getvalue().decode('latin-1')
126
 
127
- if not html_content or len(html_content.strip()) == 0:
128
- st.error("The uploaded file is empty.")
129
- else:
130
- # Convert to PDF
131
- pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file)
132
 
133
- if error:
134
- st.error(f"❌ {error}")
135
- with st.expander("Show error details"):
136
- st.code(error)
137
- else:
138
- st.success("βœ… PDF generated successfully!")
139
-
140
- # Download button with PDF bytes
141
- output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
142
- if not output_filename.endswith('.pdf'):
143
- output_filename += '.pdf'
144
-
145
- st.download_button(
146
- label="⬇️ Download PDF",
147
- data=pdf_bytes,
148
- file_name=output_filename,
149
- mime="application/pdf",
150
- use_container_width=True,
151
- key="download_file_pdf"
152
- )
153
- except Exception as e:
154
- st.error(f"❌ Error processing file: {str(e)}")
155
- with st.expander("Show full error"):
156
- import traceback
157
- st.code(traceback.format_exc())
158
 
159
  # Tab 2: Paste HTML Code
160
  with tab2:
@@ -177,63 +288,99 @@ with tab2:
177
  h1 {
178
  font-size: 48px;
179
  margin-bottom: 20px;
 
180
  }
181
  p {
182
  font-size: 18px;
183
  line-height: 1.6;
184
  }
 
 
 
 
 
 
185
  </style>
186
  </head>
187
  <body>
188
  <h1>Hello, PDF World! 🌍</h1>
189
  <p>This is a sample HTML document converted to PDF.</p>
190
- <p>You can customize this HTML with your own content!</p>
 
 
191
  </body>
192
  </html>""",
193
  height=400,
194
  key="html_code"
195
  )
196
 
197
- aspect_ratio_text = st.radio(
198
- "Aspect Ratio",
199
- options=["16:9", "1:1", "9:16"],
200
- index=0,
201
- key="aspect_text",
202
- help="Select the page orientation and dimensions"
203
- )
204
-
205
- convert_text_btn = st.button("πŸ”„ Convert to PDF", key="convert_text", type="primary", use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  with col2:
208
- if convert_text_btn:
209
- if html_code and html_code.strip():
210
- with st.spinner("Converting HTML to PDF..."):
211
- # Convert to PDF
212
- pdf_bytes, error = convert_html_to_pdf(html_code, aspect_ratio_text)
213
-
214
- if error:
215
- st.error(error)
216
- else:
217
- st.success("βœ… PDF generated successfully!")
218
-
219
- # Download button with PDF bytes
220
- st.download_button(
221
- label="⬇️ Download PDF",
222
- data=pdf_bytes,
223
- file_name="converted.pdf",
224
- mime="application/pdf",
225
- use_container_width=True
226
- )
227
  else:
228
- st.warning("Please enter HTML content first.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  # Footer with tips
231
  st.markdown("---")
232
  st.markdown("""
233
  ### πŸ’‘ Tips:
 
234
  - **16:9** - Best for presentations and landscape documents (297mm Γ— 210mm)
235
  - **1:1** - Square format (210mm Γ— 210mm)
236
  - **9:16** - Portrait format, standard A4 (210mm Γ— 297mm)
237
- - Include inline CSS for styling
 
238
  - External resources should use absolute URLs
239
  """)
 
4
  import tempfile
5
  import shutil
6
  from pathlib import Path
7
+ import base64
8
+ import re
9
 
10
  st.set_page_config(
11
  page_title="HTML to PDF Converter",
 
13
  layout="wide"
14
  )
15
 
16
+ def detect_aspect_ratio(html_content):
17
+ """
18
+ Detect aspect ratio from HTML content
19
+ Returns: "16:9", "1:1", or "9:16"
20
+ """
21
+ # Check for viewport meta tag
22
+ viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
23
+ if viewport_match:
24
+ viewport = viewport_match.group(1).lower()
25
+ if 'width=device-width' in viewport or 'width=100%' in viewport:
26
+ # Check for orientation hints
27
+ if 'orientation=portrait' in viewport:
28
+ return "9:16"
29
+ elif 'orientation=landscape' in viewport:
30
+ return "16:9"
31
+
32
+ # Check for CSS aspect-ratio property
33
+ aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
34
+ if aspect_match:
35
+ width = int(aspect_match.group(1))
36
+ height = int(aspect_match.group(2))
37
+ ratio = width / height
38
+ if ratio > 1.5:
39
+ return "16:9"
40
+ elif ratio < 0.7:
41
+ return "9:16"
42
+ else:
43
+ return "1:1"
44
+
45
+ # Check for common presentation frameworks
46
+ if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
47
+ return "16:9"
48
+
49
+ # Check body style for width/height hints
50
+ body_match = re.search(r'<body[^>]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
51
+ if body_match:
52
+ style = body_match.group(1).lower()
53
+ if 'width' in style and 'height' in style:
54
+ width_match = re.search(r'width\s*:\s*(\d+)', style)
55
+ height_match = re.search(r'height\s*:\s*(\d+)', style)
56
+ if width_match and height_match:
57
+ w = int(width_match.group(1))
58
+ h = int(height_match.group(1))
59
+ ratio = w / h
60
+ if ratio > 1.5:
61
+ return "16:9"
62
+ elif ratio < 0.7:
63
+ return "9:16"
64
+
65
+ # Default to A4 portrait for documents
66
+ return "9:16"
67
+
68
+ def render_html_preview(html_content):
69
+ """Render HTML preview in an iframe"""
70
+ # Encode HTML content
71
+ b64 = base64.b64encode(html_content.encode()).decode()
72
+ iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
73
+ return iframe_html
74
+
75
+ def render_pdf_preview(pdf_bytes):
76
+ """Render PDF preview"""
77
+ b64 = base64.b64encode(pdf_bytes).decode()
78
+ pdf_display = f'<iframe src="data:application/pdf;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;" type="application/pdf"></iframe>'
79
+ return pdf_display
80
+
81
  def convert_html_to_pdf(html_content, aspect_ratio):
82
  """
83
+ Convert HTML content to PDF using Puppeteer with better styling preservation
84
 
85
  Args:
86
  html_content: String containing HTML content
 
94
  # Create temporary directory for processing
95
  temp_dir = tempfile.mkdtemp()
96
 
97
+ # Inject CSS to preserve styles better
98
+ style_injection = """
99
+ <style>
100
+ @page {
101
+ margin: 0;
102
+ }
103
+ * {
104
+ -webkit-print-color-adjust: exact !important;
105
+ print-color-adjust: exact !important;
106
+ color-adjust: exact !important;
107
+ }
108
+ body {
109
+ -webkit-print-color-adjust: exact !important;
110
+ print-color-adjust: exact !important;
111
+ }
112
+ </style>
113
+ """
114
+
115
+ # Insert style injection before closing head tag or at the start of body
116
+ if '</head>' in html_content:
117
+ html_content = html_content.replace('</head>', style_injection + '</head>')
118
+ elif '<body' in html_content:
119
+ html_content = html_content.replace('<body', style_injection + '<body', 1)
120
+ else:
121
+ html_content = style_injection + html_content
122
+
123
  # Save HTML content to temporary file
124
  html_file = os.path.join(temp_dir, "input.html")
125
  with open(html_file, 'w', encoding='utf-8') as f:
126
  f.write(html_content)
127
 
128
+ # Get the path to puppeteer_pdf.js
129
  script_dir = os.path.dirname(os.path.abspath(__file__))
130
  puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
131
 
 
168
  # Page header
169
  st.title("πŸ“„ HTML to PDF Converter")
170
  st.markdown("""
171
+ Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
172
+ Preserves styles, fonts, colors, and layout.
173
  """)
174
 
175
  # Create tabs
 
177
 
178
  # Tab 1: Upload HTML File
179
  with tab1:
180
+ uploaded_file = st.file_uploader(
181
+ "Choose an HTML file",
182
+ type=['html', 'htm'],
183
+ key="file_uploader",
184
+ help="Upload an HTML file (max 200MB)",
185
+ accept_multiple_files=False
186
+ )
187
 
188
+ if uploaded_file is not None:
189
+ st.success(f"βœ… File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
 
 
 
 
 
 
190
 
191
+ # Read file content
192
+ uploaded_file.seek(0)
193
+ try:
194
+ html_content = uploaded_file.getvalue().decode('utf-8')
195
+ except UnicodeDecodeError:
196
+ uploaded_file.seek(0)
197
+ html_content = uploaded_file.getvalue().decode('latin-1')
198
 
199
+ # Auto-detect aspect ratio
200
+ detected_ratio = detect_aspect_ratio(html_content)
 
 
 
 
 
201
 
202
+ col1, col2 = st.columns([1, 1])
203
+
204
+ with col1:
205
+ st.subheader("βš™οΈ Settings")
206
+
207
+ auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file")
208
+
209
+ if auto_detect:
210
+ aspect_ratio_file = detected_ratio
211
+ st.info(f"πŸ” Detected: **{detected_ratio}**")
212
+ else:
213
+ aspect_ratio_file = st.radio(
214
+ "Aspect Ratio",
215
+ options=["16:9", "1:1", "9:16"],
216
+ index=["16:9", "1:1", "9:16"].index(detected_ratio),
217
+ key="aspect_file",
218
+ help="Select the page orientation and dimensions"
219
+ )
220
+
221
+ st.markdown(f"""
222
+ **Selected: {aspect_ratio_file}**
223
+ - 16:9 = Landscape (297mm Γ— 210mm)
224
+ - 1:1 = Square (210mm Γ— 210mm)
225
+ - 9:16 = Portrait (210mm Γ— 297mm)
226
+ """)
227
+
228
+ convert_file_btn = st.button("πŸ”„ Convert to PDF", key="convert_file", type="primary", use_container_width=True)
229
+
230
+ with col2:
231
+ st.subheader("πŸ‘οΈ HTML Preview")
232
+ with st.expander("Show HTML Preview", expanded=False):
233
+ st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True)
234
+
235
+ # Conversion section
236
+ if convert_file_btn:
237
+ with st.spinner("Converting HTML to PDF..."):
238
+ pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file)
239
+
240
+ if error:
241
+ st.error(f"❌ {error}")
242
+ with st.expander("Show error details"):
243
+ st.code(error)
244
+ else:
245
+ st.success("βœ… PDF generated successfully!")
246
 
247
+ col_a, col_b = st.columns([1, 1])
 
 
 
 
 
 
248
 
249
+ with col_a:
250
+ output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
251
+ if not output_filename.endswith('.pdf'):
252
+ output_filename += '.pdf'
 
253
 
254
+ st.download_button(
255
+ label="⬇️ Download PDF",
256
+ data=pdf_bytes,
257
+ file_name=output_filename,
258
+ mime="application/pdf",
259
+ use_container_width=True,
260
+ key="download_file_pdf"
261
+ )
262
+
263
+ with col_b:
264
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
265
+
266
+ # PDF Preview
267
+ st.subheader("πŸ“„ PDF Preview")
268
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
 
 
 
 
 
 
 
 
 
 
269
 
270
  # Tab 2: Paste HTML Code
271
  with tab2:
 
288
  h1 {
289
  font-size: 48px;
290
  margin-bottom: 20px;
291
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
292
  }
293
  p {
294
  font-size: 18px;
295
  line-height: 1.6;
296
  }
297
+ .box {
298
+ background: rgba(255,255,255,0.1);
299
+ padding: 20px;
300
+ border-radius: 10px;
301
+ margin-top: 20px;
302
+ }
303
  </style>
304
  </head>
305
  <body>
306
  <h1>Hello, PDF World! 🌍</h1>
307
  <p>This is a sample HTML document converted to PDF.</p>
308
+ <div class="box">
309
+ <p>✨ Styles, colors, and gradients are preserved!</p>
310
+ </div>
311
  </body>
312
  </html>""",
313
  height=400,
314
  key="html_code"
315
  )
316
 
317
+ if html_code and html_code.strip():
318
+ # Auto-detect aspect ratio
319
+ detected_ratio_text = detect_aspect_ratio(html_code)
320
+
321
+ auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
322
+
323
+ if auto_detect_text:
324
+ aspect_ratio_text = detected_ratio_text
325
+ st.info(f"πŸ” Detected: **{detected_ratio_text}**")
326
+ else:
327
+ aspect_ratio_text = st.radio(
328
+ "Aspect Ratio",
329
+ options=["16:9", "1:1", "9:16"],
330
+ index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
331
+ key="aspect_text",
332
+ help="Select the page orientation and dimensions"
333
+ )
334
+
335
+ convert_text_btn = st.button("πŸ”„ Convert to PDF", key="convert_text", type="primary", use_container_width=True)
336
+ else:
337
+ convert_text_btn = False
338
 
339
  with col2:
340
+ if html_code and html_code.strip():
341
+ st.subheader("πŸ‘οΈ HTML Preview")
342
+ with st.expander("Show HTML Preview", expanded=False):
343
+ st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
344
+
345
+ if convert_text_btn and html_code and html_code.strip():
346
+ with st.spinner("Converting HTML to PDF..."):
347
+ pdf_bytes, error = convert_html_to_pdf(html_code, aspect_ratio_text)
348
+
349
+ if error:
350
+ st.error(f"❌ {error}")
351
+ with st.expander("Show error details"):
352
+ st.code(error)
 
 
 
 
 
 
353
  else:
354
+ st.success("βœ… PDF generated successfully!")
355
+
356
+ col_a, col_b = st.columns([1, 1])
357
+
358
+ with col_a:
359
+ st.download_button(
360
+ label="⬇️ Download PDF",
361
+ data=pdf_bytes,
362
+ file_name="converted.pdf",
363
+ mime="application/pdf",
364
+ use_container_width=True,
365
+ key="download_text_pdf"
366
+ )
367
+
368
+ with col_b:
369
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
370
+
371
+ # PDF Preview
372
+ st.subheader("πŸ“„ PDF Preview")
373
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
374
 
375
  # Footer with tips
376
  st.markdown("---")
377
  st.markdown("""
378
  ### πŸ’‘ Tips:
379
+ - **Auto-detection** analyzes your HTML to suggest the best aspect ratio
380
  - **16:9** - Best for presentations and landscape documents (297mm Γ— 210mm)
381
  - **1:1** - Square format (210mm Γ— 210mm)
382
  - **9:16** - Portrait format, standard A4 (210mm Γ— 297mm)
383
+ - All CSS styles, colors, gradients, and fonts are preserved
384
+ - Use inline CSS or `<style>` tags for best results
385
  - External resources should use absolute URLs
386
  """)