import streamlit as st import subprocess import os import tempfile import shutil from pathlib import Path import base64 import re st.set_page_config( page_title="HTML to PDF Converter", page_icon="📄", layout="wide" ) def detect_aspect_ratio(html_content): """ Detect aspect ratio from HTML content Returns: "16:9", "1:1", or "9:16" """ # Check for viewport meta tag viewport_match = re.search(r']*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE) if viewport_match: viewport = viewport_match.group(1).lower() if 'width=device-width' in viewport or 'width=100%' in viewport: # Check for orientation hints if 'orientation=portrait' in viewport: return "9:16" elif 'orientation=landscape' in viewport: return "16:9" # Check for CSS aspect-ratio property aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE) if aspect_match: width = int(aspect_match.group(1)) height = int(aspect_match.group(2)) ratio = width / height if ratio > 1.5: return "16:9" elif ratio < 0.7: return "9:16" else: return "1:1" # Check for common presentation frameworks if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']): return "16:9" # Check body style for width/height hints body_match = re.search(r']*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE) if body_match: style = body_match.group(1).lower() if 'width' in style and 'height' in style: width_match = re.search(r'width\s*:\s*(\d+)', style) height_match = re.search(r'height\s*:\s*(\d+)', style) if width_match and height_match: w = int(width_match.group(1)) h = int(height_match.group(1)) ratio = w / h if ratio > 1.5: return "16:9" elif ratio < 0.7: return "9:16" # Default to A4 portrait for documents return "9:16" def save_uploaded_images(images, temp_dir): """Save uploaded images and return mapping""" image_mapping = {} images_dir = os.path.join(temp_dir, "images") os.makedirs(images_dir, exist_ok=True) for image in images: # Save image image_path = os.path.join(images_dir, image.name) with open(image_path, 'wb') as f: f.write(image.getvalue()) # Create mapping image_mapping[image.name] = f"images/{image.name}" print(f"Saved image: {image.name} -> {image_path}") return image_mapping def process_html_with_images(html_content, temp_dir, image_mapping): """Process HTML to handle image references with absolute file paths""" import re for original_name, relative_path in image_mapping.items(): # Get absolute path for the image absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path)) file_url = f"file://{absolute_path}" # Replace various image reference patterns # Pattern 1: src="filename" or src='filename' html_content = re.sub( rf'src=["\'](?:\./)?{re.escape(original_name)}["\']', f'src="{file_url}"', html_content, flags=re.IGNORECASE ) # Pattern 2: background-image: url(filename) html_content = re.sub( rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)', f'url("{file_url}")', html_content, flags=re.IGNORECASE ) # Pattern 3: href for links html_content = re.sub( rf'href=["\'](?:\./)?{re.escape(original_name)}["\']', f'href="{file_url}"', html_content, flags=re.IGNORECASE ) return html_content def render_html_preview(html_content): """Render HTML preview in an iframe""" # Encode HTML content b64 = base64.b64encode(html_content.encode()).decode() iframe_html = f'' return iframe_html def render_pdf_preview(pdf_bytes): """Render PDF preview using embedded PDF.js""" b64 = base64.b64encode(pdf_bytes).decode() pdf_viewer_html = f'''
Loading PDF...
''' return pdf_viewer_html def convert_html_to_pdf(html_content, aspect_ratio, temp_dir): """ Convert HTML content to PDF using Puppeteer with better styling preservation Args: html_content: String containing HTML content aspect_ratio: One of "16:9", "1:1", or "9:16" temp_dir: Temporary directory for processing Returns: Tuple of (pdf_bytes, error_message) """ try: # Inject CSS to preserve styles better style_injection = """ """ # Insert style injection before closing head tag or at the start of body if '' in html_content: html_content = html_content.replace('', style_injection + '') elif ' {new}") full_path = os.path.join(temp_dir, new) st.text(f"Full path: {full_path}") st.text(f"Exists: {os.path.exists(full_path)}") # Convert to PDF pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir) # Cleanup if temp_dir: shutil.rmtree(temp_dir, ignore_errors=True) if error: st.error(f"❌ {error}") with st.expander("Show error details"): st.code(error) else: st.success("✅ PDF generated successfully!") col_a, col_b = st.columns([1, 1]) with col_a: output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf') if not output_filename.endswith('.pdf'): output_filename += '.pdf' st.download_button( label="⬇️ Download PDF", data=pdf_bytes, file_name=output_filename, mime="application/pdf", width="stretch", key="download_file_pdf" ) with col_b: st.info(f"📦 Size: {len(pdf_bytes):,} bytes") # PDF Preview st.subheader("📄 PDF Preview") st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True) except Exception as e: if temp_dir: shutil.rmtree(temp_dir, ignore_errors=True) st.error(f"❌ Error: {str(e)}") # Tab 2: Paste HTML Code with tab2: col1, col2 = st.columns([1, 1]) with col1: html_code = st.text_area( "HTML Content", value=""" Sample Document

Hello, PDF World! 🌍

This is a sample HTML document converted to PDF.

✨ Styles, colors, and gradients are preserved!

""", height=400, key="html_code" ) # Image uploader for text tab uploaded_images_text = st.file_uploader( "📷 Upload Images (optional)", type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'], key="image_uploader_text", help="Upload images referenced in your HTML code", accept_multiple_files=True ) if uploaded_images_text: st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded") with st.expander("View uploaded images"): cols = st.columns(min(len(uploaded_images_text), 4)) for idx, img in enumerate(uploaded_images_text): with cols[idx % 4]: st.image(img, caption=img.name, use_container_width=True) if html_code and html_code.strip(): # Auto-detect aspect ratio detected_ratio_text = detect_aspect_ratio(html_code) auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text") if auto_detect_text: aspect_ratio_text = detected_ratio_text st.info(f"🔍 Detected: **{detected_ratio_text}**") else: aspect_ratio_text = st.radio( "Aspect Ratio", options=["16:9", "1:1", "9:16"], index=["16:9", "1:1", "9:16"].index(detected_ratio_text), key="aspect_text", help="Select the page orientation and dimensions" ) convert_text_btn = st.button("🔄 Convert to PDF", key="convert_text", type="primary", width="stretch") else: convert_text_btn = False with col2: if html_code and html_code.strip(): st.subheader("👁️ HTML Preview") with st.expander("Show HTML Preview", expanded=False): st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True) if convert_text_btn and html_code and html_code.strip(): temp_dir = None try: with st.spinner("Converting HTML to PDF..."): # Create temp directory temp_dir = tempfile.mkdtemp() # Process images if uploaded processed_html = html_code if uploaded_images_text: image_mapping = save_uploaded_images(uploaded_images_text, temp_dir) processed_html = process_html_with_images(html_code, temp_dir, image_mapping) st.info(f"📷 Processed {len(uploaded_images_text)} image(s)") # Debug info with st.expander("🔍 Debug: Image Mapping"): for orig, new in image_mapping.items(): st.text(f"{orig} -> {new}") full_path = os.path.join(temp_dir, new) st.text(f"Full path: {full_path}") st.text(f"Exists: {os.path.exists(full_path)}") # Convert to PDF pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir) # Cleanup if temp_dir: shutil.rmtree(temp_dir, ignore_errors=True) if error: st.error(f"❌ {error}") with st.expander("Show error details"): st.code(error) else: st.success("✅ PDF generated successfully!") col_a, col_b = st.columns([1, 1]) with col_a: st.download_button( label="⬇️ Download PDF", data=pdf_bytes, file_name="converted.pdf", mime="application/pdf", width="stretch", key="download_text_pdf" ) with col_b: st.info(f"📦 Size: {len(pdf_bytes):,} bytes") # PDF Preview st.subheader("📄 PDF Preview") st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True) except Exception as e: if temp_dir: shutil.rmtree(temp_dir, ignore_errors=True) st.error(f"❌ Error: {str(e)}") # Footer with tips st.markdown("---") st.markdown(""" ### 💡 Tips: - **Auto-detection** analyzes your HTML to suggest the best aspect ratio - **16:9** - Best for presentations and landscape documents (297mm × 210mm) - **1:1** - Square format (210mm × 210mm) - **9:16** - Portrait format, standard A4 (210mm × 297mm) - **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images - All CSS styles, colors, gradients, and fonts are preserved - Use inline CSS or `