Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import subprocess | |
| import os | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import base64 | |
| import re | |
| st.set_page_config( | |
| page_title="HTML to PDF Converter", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| def detect_aspect_ratio(html_content): | |
| """ | |
| Detect aspect ratio from HTML content | |
| Returns: "16:9", "1:1", or "9:16" | |
| """ | |
| # Check for viewport meta tag | |
| viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE) | |
| if viewport_match: | |
| viewport = viewport_match.group(1).lower() | |
| if 'width=device-width' in viewport or 'width=100%' in viewport: | |
| # Check for orientation hints | |
| if 'orientation=portrait' in viewport: | |
| return "9:16" | |
| elif 'orientation=landscape' in viewport: | |
| return "16:9" | |
| # Check for CSS aspect-ratio property | |
| aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE) | |
| if aspect_match: | |
| width = int(aspect_match.group(1)) | |
| height = int(aspect_match.group(2)) | |
| ratio = width / height | |
| if ratio > 1.5: | |
| return "16:9" | |
| elif ratio < 0.7: | |
| return "9:16" | |
| else: | |
| return "1:1" | |
| # Check for common presentation frameworks | |
| if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']): | |
| return "16:9" | |
| # Check body style for width/height hints | |
| body_match = re.search(r'<body[^>]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE) | |
| if body_match: | |
| style = body_match.group(1).lower() | |
| if 'width' in style and 'height' in style: | |
| width_match = re.search(r'width\s*:\s*(\d+)', style) | |
| height_match = re.search(r'height\s*:\s*(\d+)', style) | |
| if width_match and height_match: | |
| w = int(width_match.group(1)) | |
| h = int(height_match.group(1)) | |
| ratio = w / h | |
| if ratio > 1.5: | |
| return "16:9" | |
| elif ratio < 0.7: | |
| return "9:16" | |
| # Default to A4 portrait for documents | |
| return "9:16" | |
| def save_uploaded_images(images, temp_dir): | |
| """Save uploaded images and return mapping""" | |
| image_mapping = {} | |
| images_dir = os.path.join(temp_dir, "images") | |
| os.makedirs(images_dir, exist_ok=True) | |
| for image in images: | |
| # Save image | |
| image_path = os.path.join(images_dir, image.name) | |
| with open(image_path, 'wb') as f: | |
| f.write(image.getvalue()) | |
| # Create mapping | |
| image_mapping[image.name] = f"images/{image.name}" | |
| print(f"Saved image: {image.name} -> {image_path}") | |
| return image_mapping | |
| def process_html_with_images(html_content, temp_dir, image_mapping): | |
| """Process HTML to handle image references with absolute file paths""" | |
| import re | |
| for original_name, relative_path in image_mapping.items(): | |
| # Get absolute path for the image | |
| absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path)) | |
| file_url = f"file://{absolute_path}" | |
| # Replace various image reference patterns | |
| # Pattern 1: src="filename" or src='filename' | |
| html_content = re.sub( | |
| rf'src=["\'](?:\./)?{re.escape(original_name)}["\']', | |
| f'src="{file_url}"', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| # Pattern 2: background-image: url(filename) | |
| html_content = re.sub( | |
| rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)', | |
| f'url("{file_url}")', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| # Pattern 3: href for links | |
| html_content = re.sub( | |
| rf'href=["\'](?:\./)?{re.escape(original_name)}["\']', | |
| f'href="{file_url}"', | |
| html_content, | |
| flags=re.IGNORECASE | |
| ) | |
| return html_content | |
| def render_html_preview(html_content): | |
| """Render HTML preview in an iframe""" | |
| # Encode HTML content | |
| b64 = base64.b64encode(html_content.encode()).decode() | |
| iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>' | |
| return iframe_html | |
| def render_pdf_preview(pdf_bytes): | |
| """Render PDF preview using embedded PDF.js""" | |
| b64 = base64.b64encode(pdf_bytes).decode() | |
| pdf_viewer_html = f''' | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <style> | |
| body {{ | |
| margin: 0; | |
| padding: 0; | |
| overflow: hidden; | |
| background: #525659; | |
| }} | |
| #pdf-container {{ | |
| width: 100%; | |
| height: 100vh; | |
| overflow: auto; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| padding: 20px; | |
| box-sizing: border-box; | |
| }} | |
| canvas {{ | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.3); | |
| margin-bottom: 10px; | |
| background: white; | |
| }} | |
| #loading {{ | |
| color: white; | |
| font-family: Arial, sans-serif; | |
| font-size: 18px; | |
| padding: 20px; | |
| }} | |
| .error {{ | |
| color: #ff6b6b; | |
| font-family: Arial, sans-serif; | |
| padding: 20px; | |
| background: rgba(0,0,0,0.5); | |
| border-radius: 5px; | |
| margin: 20px; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div id="pdf-container"> | |
| <div id="loading">Loading PDF...</div> | |
| </div> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script> | |
| <script> | |
| pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js'; | |
| const pdfData = atob('{b64}'); | |
| const pdfContainer = document.getElementById('pdf-container'); | |
| const loading = document.getElementById('loading'); | |
| const uint8Array = new Uint8Array(pdfData.length); | |
| for (let i = 0; i < pdfData.length; i++) {{ | |
| uint8Array[i] = pdfData.charCodeAt(i); | |
| }} | |
| pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{ | |
| loading.style.display = 'none'; | |
| const numPages = pdf.numPages; | |
| const promises = []; | |
| for (let pageNum = 1; pageNum <= numPages; pageNum++) {{ | |
| promises.push( | |
| pdf.getPage(pageNum).then(function(page) {{ | |
| const scale = 1.5; | |
| const viewport = page.getViewport({{scale: scale}}); | |
| const canvas = document.createElement('canvas'); | |
| const context = canvas.getContext('2d'); | |
| canvas.height = viewport.height; | |
| canvas.width = viewport.width; | |
| pdfContainer.appendChild(canvas); | |
| return page.render({{ | |
| canvasContext: context, | |
| viewport: viewport | |
| }}).promise; | |
| }}) | |
| ); | |
| }} | |
| return Promise.all(promises); | |
| }}).catch(function(error) {{ | |
| loading.innerHTML = '<div class="error">Error loading PDF: ' + error.message + '</div>'; | |
| console.error('Error loading PDF:', error); | |
| }}); | |
| </script> | |
| </body> | |
| </html> | |
| ''' | |
| return pdf_viewer_html | |
| def convert_html_to_pdf(html_content, aspect_ratio, temp_dir): | |
| """ | |
| Convert HTML content to PDF using Puppeteer with better styling preservation | |
| Args: | |
| html_content: String containing HTML content | |
| aspect_ratio: One of "16:9", "1:1", or "9:16" | |
| temp_dir: Temporary directory for processing | |
| Returns: | |
| Tuple of (pdf_bytes, error_message) | |
| """ | |
| try: | |
| # Inject CSS to preserve styles better | |
| style_injection = """ | |
| <style> | |
| @page { | |
| margin: 0; | |
| } | |
| * { | |
| -webkit-print-color-adjust: exact !important; | |
| print-color-adjust: exact !important; | |
| color-adjust: exact !important; | |
| } | |
| body { | |
| -webkit-print-color-adjust: exact !important; | |
| print-color-adjust: exact !important; | |
| } | |
| </style> | |
| """ | |
| # Insert style injection before closing head tag or at the start of body | |
| if '</head>' in html_content: | |
| html_content = html_content.replace('</head>', style_injection + '</head>') | |
| elif '<body' in html_content: | |
| html_content = html_content.replace('<body', style_injection + '<body', 1) | |
| else: | |
| html_content = style_injection + html_content | |
| # Save HTML content to temporary file | |
| html_file = os.path.join(temp_dir, "input.html") | |
| with open(html_file, 'w', encoding='utf-8') as f: | |
| f.write(html_content) | |
| # Get the path to puppeteer_pdf.js | |
| script_dir = os.path.dirname(os.path.abspath(__file__)) | |
| puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js') | |
| # Run Node.js script to convert HTML to PDF | |
| result = subprocess.run( | |
| ['node', puppeteer_script, html_file, aspect_ratio], | |
| capture_output=True, | |
| text=True, | |
| timeout=60, | |
| cwd=os.path.dirname(script_dir) | |
| ) | |
| if result.returncode != 0: | |
| return None, f"PDF conversion failed: {result.stderr}" | |
| # Get the generated PDF path | |
| pdf_file = html_file.replace('.html', '.pdf') | |
| if not os.path.exists(pdf_file): | |
| return None, "PDF file was not generated" | |
| # Read PDF file into memory | |
| with open(pdf_file, 'rb') as f: | |
| pdf_bytes = f.read() | |
| return pdf_bytes, None | |
| except subprocess.TimeoutExpired: | |
| return None, "Error: PDF conversion timed out (60 seconds)" | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| # Page header | |
| st.title("π HTML to PDF Converter") | |
| st.markdown(""" | |
| Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection. | |
| β¨ **NEW:** Upload images alongside your HTML files! | |
| """) | |
| # Create tabs | |
| tab1, tab2 = st.tabs(["π€ Upload HTML File", "π Paste HTML Code"]) | |
| # Tab 1: Upload HTML File | |
| with tab1: | |
| uploaded_file = st.file_uploader( | |
| "Choose an HTML file", | |
| type=['html', 'htm'], | |
| key="file_uploader", | |
| help="Upload an HTML file (max 200MB)", | |
| accept_multiple_files=False | |
| ) | |
| # Image uploader | |
| uploaded_images = st.file_uploader( | |
| "π· Upload Images (optional)", | |
| type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'], | |
| key="image_uploader", | |
| help="Upload images referenced in your HTML", | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_images: | |
| st.success(f"β {len(uploaded_images)} image(s) uploaded") | |
| with st.expander("View uploaded images"): | |
| cols = st.columns(min(len(uploaded_images), 4)) | |
| for idx, img in enumerate(uploaded_images): | |
| with cols[idx % 4]: | |
| st.image(img, caption=img.name, use_container_width=True) | |
| if uploaded_file is not None: | |
| st.success(f"β File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)") | |
| # Read file content | |
| uploaded_file.seek(0) | |
| try: | |
| html_content = uploaded_file.getvalue().decode('utf-8') | |
| except UnicodeDecodeError: | |
| uploaded_file.seek(0) | |
| html_content = uploaded_file.getvalue().decode('latin-1') | |
| # Auto-detect aspect ratio | |
| detected_ratio = detect_aspect_ratio(html_content) | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.subheader("βοΈ Settings") | |
| auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file") | |
| if auto_detect: | |
| aspect_ratio_file = detected_ratio | |
| st.info(f"π Detected: **{detected_ratio}**") | |
| else: | |
| aspect_ratio_file = st.radio( | |
| "Aspect Ratio", | |
| options=["16:9", "1:1", "9:16"], | |
| index=["16:9", "1:1", "9:16"].index(detected_ratio), | |
| key="aspect_file", | |
| help="Select the page orientation and dimensions" | |
| ) | |
| st.markdown(f""" | |
| **Selected: {aspect_ratio_file}** | |
| - 16:9 = Landscape (297mm Γ 210mm) | |
| - 1:1 = Square (210mm Γ 210mm) | |
| - 9:16 = Portrait (210mm Γ 297mm) | |
| """) | |
| convert_file_btn = st.button("π Convert to PDF", key="convert_file", type="primary", width="stretch") | |
| with col2: | |
| st.subheader("ποΈ HTML Preview") | |
| with st.expander("Show HTML Preview", expanded=False): | |
| st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True) | |
| # Conversion section | |
| if convert_file_btn: | |
| temp_dir = None | |
| try: | |
| with st.spinner("Converting HTML to PDF..."): | |
| # Create temp directory | |
| temp_dir = tempfile.mkdtemp() | |
| # Process images if uploaded | |
| if uploaded_images: | |
| image_mapping = save_uploaded_images(uploaded_images, temp_dir) | |
| html_content = process_html_with_images(html_content, temp_dir, image_mapping) | |
| st.info(f"π· Processed {len(uploaded_images)} image(s)") | |
| # Debug info | |
| with st.expander("π Debug: Image Mapping"): | |
| for orig, new in image_mapping.items(): | |
| st.text(f"{orig} -> {new}") | |
| full_path = os.path.join(temp_dir, new) | |
| st.text(f"Full path: {full_path}") | |
| st.text(f"Exists: {os.path.exists(full_path)}") | |
| # Convert to PDF | |
| pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir) | |
| # Cleanup | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| if error: | |
| st.error(f"β {error}") | |
| with st.expander("Show error details"): | |
| st.code(error) | |
| else: | |
| st.success("β PDF generated successfully!") | |
| col_a, col_b = st.columns([1, 1]) | |
| with col_a: | |
| output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf') | |
| if not output_filename.endswith('.pdf'): | |
| output_filename += '.pdf' | |
| st.download_button( | |
| label="β¬οΈ Download PDF", | |
| data=pdf_bytes, | |
| file_name=output_filename, | |
| mime="application/pdf", | |
| width="stretch", | |
| key="download_file_pdf" | |
| ) | |
| with col_b: | |
| st.info(f"π¦ Size: {len(pdf_bytes):,} bytes") | |
| # PDF Preview | |
| st.subheader("π PDF Preview") | |
| st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True) | |
| except Exception as e: | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| st.error(f"β Error: {str(e)}") | |
| # Tab 2: Paste HTML Code | |
| with tab2: | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| html_code = st.text_area( | |
| "HTML Content", | |
| value="""<!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Sample Document</title> | |
| <style> | |
| body { | |
| font-family: Arial, sans-serif; | |
| margin: 40px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| } | |
| h1 { | |
| font-size: 48px; | |
| margin-bottom: 20px; | |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.3); | |
| } | |
| p { | |
| font-size: 18px; | |
| line-height: 1.6; | |
| } | |
| .box { | |
| background: rgba(255,255,255,0.1); | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin-top: 20px; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>Hello, PDF World! π</h1> | |
| <p>This is a sample HTML document converted to PDF.</p> | |
| <div class="box"> | |
| <p>β¨ Styles, colors, and gradients are preserved!</p> | |
| </div> | |
| </body> | |
| </html>""", | |
| height=400, | |
| key="html_code" | |
| ) | |
| # Image uploader for text tab | |
| uploaded_images_text = st.file_uploader( | |
| "π· Upload Images (optional)", | |
| type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'], | |
| key="image_uploader_text", | |
| help="Upload images referenced in your HTML code", | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_images_text: | |
| st.success(f"β {len(uploaded_images_text)} image(s) uploaded") | |
| with st.expander("View uploaded images"): | |
| cols = st.columns(min(len(uploaded_images_text), 4)) | |
| for idx, img in enumerate(uploaded_images_text): | |
| with cols[idx % 4]: | |
| st.image(img, caption=img.name, use_container_width=True) | |
| if html_code and html_code.strip(): | |
| # Auto-detect aspect ratio | |
| detected_ratio_text = detect_aspect_ratio(html_code) | |
| auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text") | |
| if auto_detect_text: | |
| aspect_ratio_text = detected_ratio_text | |
| st.info(f"π Detected: **{detected_ratio_text}**") | |
| else: | |
| aspect_ratio_text = st.radio( | |
| "Aspect Ratio", | |
| options=["16:9", "1:1", "9:16"], | |
| index=["16:9", "1:1", "9:16"].index(detected_ratio_text), | |
| key="aspect_text", | |
| help="Select the page orientation and dimensions" | |
| ) | |
| convert_text_btn = st.button("π Convert to PDF", key="convert_text", type="primary", width="stretch") | |
| else: | |
| convert_text_btn = False | |
| with col2: | |
| if html_code and html_code.strip(): | |
| st.subheader("ποΈ HTML Preview") | |
| with st.expander("Show HTML Preview", expanded=False): | |
| st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True) | |
| if convert_text_btn and html_code and html_code.strip(): | |
| temp_dir = None | |
| try: | |
| with st.spinner("Converting HTML to PDF..."): | |
| # Create temp directory | |
| temp_dir = tempfile.mkdtemp() | |
| # Process images if uploaded | |
| processed_html = html_code | |
| if uploaded_images_text: | |
| image_mapping = save_uploaded_images(uploaded_images_text, temp_dir) | |
| processed_html = process_html_with_images(html_code, temp_dir, image_mapping) | |
| st.info(f"π· Processed {len(uploaded_images_text)} image(s)") | |
| # Debug info | |
| with st.expander("π Debug: Image Mapping"): | |
| for orig, new in image_mapping.items(): | |
| st.text(f"{orig} -> {new}") | |
| full_path = os.path.join(temp_dir, new) | |
| st.text(f"Full path: {full_path}") | |
| st.text(f"Exists: {os.path.exists(full_path)}") | |
| # Convert to PDF | |
| pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir) | |
| # Cleanup | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| if error: | |
| st.error(f"β {error}") | |
| with st.expander("Show error details"): | |
| st.code(error) | |
| else: | |
| st.success("β PDF generated successfully!") | |
| col_a, col_b = st.columns([1, 1]) | |
| with col_a: | |
| st.download_button( | |
| label="β¬οΈ Download PDF", | |
| data=pdf_bytes, | |
| file_name="converted.pdf", | |
| mime="application/pdf", | |
| width="stretch", | |
| key="download_text_pdf" | |
| ) | |
| with col_b: | |
| st.info(f"π¦ Size: {len(pdf_bytes):,} bytes") | |
| # PDF Preview | |
| st.subheader("π PDF Preview") | |
| st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True) | |
| except Exception as e: | |
| if temp_dir: | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| st.error(f"β Error: {str(e)}") | |
| # Footer with tips | |
| st.markdown("---") | |
| st.markdown(""" | |
| ### π‘ Tips: | |
| - **Auto-detection** analyzes your HTML to suggest the best aspect ratio | |
| - **16:9** - Best for presentations and landscape documents (297mm Γ 210mm) | |
| - **1:1** - Square format (210mm Γ 210mm) | |
| - **9:16** - Portrait format, standard A4 (210mm Γ 297mm) | |
| - **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images | |
| - All CSS styles, colors, gradients, and fonts are preserved | |
| - Use inline CSS or `<style>` tags for best results | |
| - Reference images by filename in your HTML (e.g., `<img src="image.jpg">`) | |
| - External resources should use absolute URLs | |
| - **PDF Preview** renders directly in the browser using PDF.js | |
| ### πΌοΈ Using Images: | |
| 1. Upload your HTML file | |
| 2. Upload all images referenced in the HTML | |
| 3. Make sure image filenames in HTML match uploaded files exactly | |
| 4. The converter will automatically embed images in the PDF | |
| """) |