ABDALLALSWAITI commited on
Commit
430bb94
Β·
verified Β·
1 Parent(s): 442c38f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +173 -71
src/streamlit_app.py CHANGED
@@ -65,6 +65,40 @@ def detect_aspect_ratio(html_content):
65
  # Default to A4 portrait for documents
66
  return "9:16"
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def render_html_preview(html_content):
69
  """Render HTML preview in an iframe"""
70
  # Encode HTML content
@@ -73,10 +107,9 @@ def render_html_preview(html_content):
73
  return iframe_html
74
 
75
  def render_pdf_preview(pdf_bytes):
76
- """Render PDF preview using embedded PDF.js for better browser compatibility"""
77
  b64 = base64.b64encode(pdf_bytes).decode()
78
 
79
- # Embed PDF.js directly to avoid I/O errors and CORS issues
80
  pdf_viewer_html = f'''
81
  <!DOCTYPE html>
82
  <html>
@@ -126,24 +159,20 @@ def render_pdf_preview(pdf_bytes):
126
 
127
  <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
128
  <script>
129
- // Set worker source
130
  pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
131
 
132
  const pdfData = atob('{b64}');
133
  const pdfContainer = document.getElementById('pdf-container');
134
  const loading = document.getElementById('loading');
135
 
136
- // Convert base64 to Uint8Array
137
  const uint8Array = new Uint8Array(pdfData.length);
138
  for (let i = 0; i < pdfData.length; i++) {{
139
  uint8Array[i] = pdfData.charCodeAt(i);
140
  }}
141
 
142
- // Load PDF
143
  pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
144
  loading.style.display = 'none';
145
 
146
- // Render all pages
147
  const numPages = pdf.numPages;
148
  const promises = [];
149
 
@@ -179,22 +208,19 @@ def render_pdf_preview(pdf_bytes):
179
  '''
180
  return pdf_viewer_html
181
 
182
- def convert_html_to_pdf(html_content, aspect_ratio):
183
  """
184
  Convert HTML content to PDF using Puppeteer with better styling preservation
185
 
186
  Args:
187
  html_content: String containing HTML content
188
  aspect_ratio: One of "16:9", "1:1", or "9:16"
 
189
 
190
  Returns:
191
  Tuple of (pdf_bytes, error_message)
192
  """
193
- temp_dir = None
194
  try:
195
- # Create temporary directory for processing
196
- temp_dir = tempfile.mkdtemp()
197
-
198
  # Inject CSS to preserve styles better
199
  style_injection = """
200
  <style>
@@ -252,25 +278,18 @@ def convert_html_to_pdf(html_content, aspect_ratio):
252
  with open(pdf_file, 'rb') as f:
253
  pdf_bytes = f.read()
254
 
255
- # Clean up temporary directory
256
- shutil.rmtree(temp_dir, ignore_errors=True)
257
-
258
  return pdf_bytes, None
259
 
260
  except subprocess.TimeoutExpired:
261
- if temp_dir:
262
- shutil.rmtree(temp_dir, ignore_errors=True)
263
  return None, "Error: PDF conversion timed out (60 seconds)"
264
  except Exception as e:
265
- if temp_dir:
266
- shutil.rmtree(temp_dir, ignore_errors=True)
267
  return None, f"Error: {str(e)}"
268
 
269
  # Page header
270
  st.title("πŸ“„ HTML to PDF Converter")
271
  st.markdown("""
272
  Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
273
- Preserves styles, fonts, colors, and layout.
274
  """)
275
 
276
  # Create tabs
@@ -286,6 +305,23 @@ with tab1:
286
  accept_multiple_files=False
287
  )
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  if uploaded_file is not None:
290
  st.success(f"βœ… File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
291
 
@@ -335,38 +371,58 @@ with tab1:
335
 
336
  # Conversion section
337
  if convert_file_btn:
338
- with st.spinner("Converting HTML to PDF..."):
339
- pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file)
340
-
341
- if error:
342
- st.error(f"❌ {error}")
343
- with st.expander("Show error details"):
344
- st.code(error)
345
- else:
346
- st.success("βœ… PDF generated successfully!")
347
 
348
- col_a, col_b = st.columns([1, 1])
 
 
 
 
349
 
350
- with col_a:
351
- output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
352
- if not output_filename.endswith('.pdf'):
353
- output_filename += '.pdf'
354
-
355
- st.download_button(
356
- label="⬇️ Download PDF",
357
- data=pdf_bytes,
358
- file_name=output_filename,
359
- mime="application/pdf",
360
- use_container_width=True,
361
- key="download_file_pdf"
362
- )
363
 
364
- with col_b:
365
- st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
 
366
 
367
- # PDF Preview
368
- st.subheader("πŸ“„ PDF Preview")
369
- st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
  # Tab 2: Paste HTML Code
372
  with tab2:
@@ -415,6 +471,23 @@ with tab2:
415
  key="html_code"
416
  )
417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  if html_code and html_code.strip():
419
  # Auto-detect aspect ratio
420
  detected_ratio_text = detect_aspect_ratio(html_code)
@@ -444,34 +517,55 @@ with tab2:
444
  st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
445
 
446
  if convert_text_btn and html_code and html_code.strip():
447
- with st.spinner("Converting HTML to PDF..."):
448
- pdf_bytes, error = convert_html_to_pdf(html_code, aspect_ratio_text)
449
-
450
- if error:
451
- st.error(f"❌ {error}")
452
- with st.expander("Show error details"):
453
- st.code(error)
454
- else:
455
- st.success("βœ… PDF generated successfully!")
456
 
457
- col_a, col_b = st.columns([1, 1])
 
 
 
 
 
458
 
459
- with col_a:
460
- st.download_button(
461
- label="⬇️ Download PDF",
462
- data=pdf_bytes,
463
- file_name="converted.pdf",
464
- mime="application/pdf",
465
- use_container_width=True,
466
- key="download_text_pdf"
467
- )
468
 
469
- with col_b:
470
- st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
 
471
 
472
- # PDF Preview
473
- st.subheader("πŸ“„ PDF Preview")
474
- st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
  # Footer with tips
477
  st.markdown("---")
@@ -481,8 +575,16 @@ st.markdown("""
481
  - **16:9** - Best for presentations and landscape documents (297mm Γ— 210mm)
482
  - **1:1** - Square format (210mm Γ— 210mm)
483
  - **9:16** - Portrait format, standard A4 (210mm Γ— 297mm)
 
484
  - All CSS styles, colors, gradients, and fonts are preserved
485
  - Use inline CSS or `<style>` tags for best results
 
486
  - External resources should use absolute URLs
487
  - **PDF Preview** renders directly in the browser using PDF.js
 
 
 
 
 
 
488
  """)
 
65
  # Default to A4 portrait for documents
66
  return "9:16"
67
 
68
+ def save_uploaded_images(images, temp_dir):
69
+ """Save uploaded images and return mapping"""
70
+ image_mapping = {}
71
+ images_dir = os.path.join(temp_dir, "images")
72
+ os.makedirs(images_dir, exist_ok=True)
73
+
74
+ for image in images:
75
+ # Save image
76
+ image_path = os.path.join(images_dir, image.name)
77
+ with open(image_path, 'wb') as f:
78
+ f.write(image.getvalue())
79
+
80
+ # Create mapping
81
+ image_mapping[image.name] = f"images/{image.name}"
82
+
83
+ return image_mapping
84
+
85
+ def process_html_with_images(html_content, image_mapping):
86
+ """Process HTML to handle image references"""
87
+ for original_name, new_path in image_mapping.items():
88
+ # Handle various image reference patterns
89
+ patterns = [
90
+ (f'src="{original_name}"', f'src="{new_path}"'),
91
+ (f"src='{original_name}'", f"src='{new_path}'"),
92
+ (f'href="{original_name}"', f'href="{new_path}"'),
93
+ (f"href='{original_name}'", f"href='{new_path}'"),
94
+ ]
95
+
96
+ for old_pattern, new_pattern in patterns:
97
+ if old_pattern in html_content:
98
+ html_content = html_content.replace(old_pattern, new_pattern)
99
+
100
+ return html_content
101
+
102
  def render_html_preview(html_content):
103
  """Render HTML preview in an iframe"""
104
  # Encode HTML content
 
107
  return iframe_html
108
 
109
  def render_pdf_preview(pdf_bytes):
110
+ """Render PDF preview using embedded PDF.js"""
111
  b64 = base64.b64encode(pdf_bytes).decode()
112
 
 
113
  pdf_viewer_html = f'''
114
  <!DOCTYPE html>
115
  <html>
 
159
 
160
  <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
161
  <script>
 
162
  pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
163
 
164
  const pdfData = atob('{b64}');
165
  const pdfContainer = document.getElementById('pdf-container');
166
  const loading = document.getElementById('loading');
167
 
 
168
  const uint8Array = new Uint8Array(pdfData.length);
169
  for (let i = 0; i < pdfData.length; i++) {{
170
  uint8Array[i] = pdfData.charCodeAt(i);
171
  }}
172
 
 
173
  pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
174
  loading.style.display = 'none';
175
 
 
176
  const numPages = pdf.numPages;
177
  const promises = [];
178
 
 
208
  '''
209
  return pdf_viewer_html
210
 
211
+ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
212
  """
213
  Convert HTML content to PDF using Puppeteer with better styling preservation
214
 
215
  Args:
216
  html_content: String containing HTML content
217
  aspect_ratio: One of "16:9", "1:1", or "9:16"
218
+ temp_dir: Temporary directory for processing
219
 
220
  Returns:
221
  Tuple of (pdf_bytes, error_message)
222
  """
 
223
  try:
 
 
 
224
  # Inject CSS to preserve styles better
225
  style_injection = """
226
  <style>
 
278
  with open(pdf_file, 'rb') as f:
279
  pdf_bytes = f.read()
280
 
 
 
 
281
  return pdf_bytes, None
282
 
283
  except subprocess.TimeoutExpired:
 
 
284
  return None, "Error: PDF conversion timed out (60 seconds)"
285
  except Exception as e:
 
 
286
  return None, f"Error: {str(e)}"
287
 
288
  # Page header
289
  st.title("πŸ“„ HTML to PDF Converter")
290
  st.markdown("""
291
  Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
292
+ ✨ **NEW:** Upload images alongside your HTML files!
293
  """)
294
 
295
  # Create tabs
 
305
  accept_multiple_files=False
306
  )
307
 
308
+ # Image uploader
309
+ uploaded_images = st.file_uploader(
310
+ "πŸ“· Upload Images (optional)",
311
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
312
+ key="image_uploader",
313
+ help="Upload images referenced in your HTML",
314
+ accept_multiple_files=True
315
+ )
316
+
317
+ if uploaded_images:
318
+ st.success(f"βœ… {len(uploaded_images)} image(s) uploaded")
319
+ with st.expander("View uploaded images"):
320
+ cols = st.columns(min(len(uploaded_images), 4))
321
+ for idx, img in enumerate(uploaded_images):
322
+ with cols[idx % 4]:
323
+ st.image(img, caption=img.name, use_container_width=True)
324
+
325
  if uploaded_file is not None:
326
  st.success(f"βœ… File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
327
 
 
371
 
372
  # Conversion section
373
  if convert_file_btn:
374
+ temp_dir = None
375
+ try:
376
+ with st.spinner("Converting HTML to PDF..."):
377
+ # Create temp directory
378
+ temp_dir = tempfile.mkdtemp()
 
 
 
 
379
 
380
+ # Process images if uploaded
381
+ if uploaded_images:
382
+ image_mapping = save_uploaded_images(uploaded_images, temp_dir)
383
+ html_content = process_html_with_images(html_content, image_mapping)
384
+ st.info(f"πŸ“· Processed {len(uploaded_images)} image(s)")
385
 
386
+ # Convert to PDF
387
+ pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir)
 
 
 
 
 
 
 
 
 
 
 
388
 
389
+ # Cleanup
390
+ if temp_dir:
391
+ shutil.rmtree(temp_dir, ignore_errors=True)
392
 
393
+ if error:
394
+ st.error(f"❌ {error}")
395
+ with st.expander("Show error details"):
396
+ st.code(error)
397
+ else:
398
+ st.success("βœ… PDF generated successfully!")
399
+
400
+ col_a, col_b = st.columns([1, 1])
401
+
402
+ with col_a:
403
+ output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
404
+ if not output_filename.endswith('.pdf'):
405
+ output_filename += '.pdf'
406
+
407
+ st.download_button(
408
+ label="⬇️ Download PDF",
409
+ data=pdf_bytes,
410
+ file_name=output_filename,
411
+ mime="application/pdf",
412
+ use_container_width=True,
413
+ key="download_file_pdf"
414
+ )
415
+
416
+ with col_b:
417
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
418
+
419
+ # PDF Preview
420
+ st.subheader("πŸ“„ PDF Preview")
421
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
422
+ except Exception as e:
423
+ if temp_dir:
424
+ shutil.rmtree(temp_dir, ignore_errors=True)
425
+ st.error(f"❌ Error: {str(e)}")
426
 
427
  # Tab 2: Paste HTML Code
428
  with tab2:
 
471
  key="html_code"
472
  )
473
 
474
+ # Image uploader for text tab
475
+ uploaded_images_text = st.file_uploader(
476
+ "πŸ“· Upload Images (optional)",
477
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
478
+ key="image_uploader_text",
479
+ help="Upload images referenced in your HTML code",
480
+ accept_multiple_files=True
481
+ )
482
+
483
+ if uploaded_images_text:
484
+ st.success(f"βœ… {len(uploaded_images_text)} image(s) uploaded")
485
+ with st.expander("View uploaded images"):
486
+ cols = st.columns(min(len(uploaded_images_text), 4))
487
+ for idx, img in enumerate(uploaded_images_text):
488
+ with cols[idx % 4]:
489
+ st.image(img, caption=img.name, use_container_width=True)
490
+
491
  if html_code and html_code.strip():
492
  # Auto-detect aspect ratio
493
  detected_ratio_text = detect_aspect_ratio(html_code)
 
517
  st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
518
 
519
  if convert_text_btn and html_code and html_code.strip():
520
+ temp_dir = None
521
+ try:
522
+ with st.spinner("Converting HTML to PDF..."):
523
+ # Create temp directory
524
+ temp_dir = tempfile.mkdtemp()
 
 
 
 
525
 
526
+ # Process images if uploaded
527
+ processed_html = html_code
528
+ if uploaded_images_text:
529
+ image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
530
+ processed_html = process_html_with_images(html_code, image_mapping)
531
+ st.info(f"πŸ“· Processed {len(uploaded_images_text)} image(s)")
532
 
533
+ # Convert to PDF
534
+ pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
 
 
 
 
 
 
 
535
 
536
+ # Cleanup
537
+ if temp_dir:
538
+ shutil.rmtree(temp_dir, ignore_errors=True)
539
 
540
+ if error:
541
+ st.error(f"❌ {error}")
542
+ with st.expander("Show error details"):
543
+ st.code(error)
544
+ else:
545
+ st.success("βœ… PDF generated successfully!")
546
+
547
+ col_a, col_b = st.columns([1, 1])
548
+
549
+ with col_a:
550
+ st.download_button(
551
+ label="⬇️ Download PDF",
552
+ data=pdf_bytes,
553
+ file_name="converted.pdf",
554
+ mime="application/pdf",
555
+ use_container_width=True,
556
+ key="download_text_pdf"
557
+ )
558
+
559
+ with col_b:
560
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
561
+
562
+ # PDF Preview
563
+ st.subheader("πŸ“„ PDF Preview")
564
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
565
+ except Exception as e:
566
+ if temp_dir:
567
+ shutil.rmtree(temp_dir, ignore_errors=True)
568
+ st.error(f"❌ Error: {str(e)}")
569
 
570
  # Footer with tips
571
  st.markdown("---")
 
575
  - **16:9** - Best for presentations and landscape documents (297mm Γ— 210mm)
576
  - **1:1** - Square format (210mm Γ— 210mm)
577
  - **9:16** - Portrait format, standard A4 (210mm Γ— 297mm)
578
+ - **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
579
  - All CSS styles, colors, gradients, and fonts are preserved
580
  - Use inline CSS or `<style>` tags for best results
581
+ - Reference images by filename in your HTML (e.g., `<img src="image.jpg">`)
582
  - External resources should use absolute URLs
583
  - **PDF Preview** renders directly in the browser using PDF.js
584
+
585
+ ### πŸ–ΌοΈ Using Images:
586
+ 1. Upload your HTML file
587
+ 2. Upload all images referenced in the HTML
588
+ 3. Make sure image filenames in HTML match uploaded files exactly
589
+ 4. The converter will automatically embed images in the PDF
590
  """)