import streamlit as st
import subprocess
import os
import tempfile
import shutil
from pathlib import Path
import base64
import re
st.set_page_config(
page_title="HTML to PDF Converter",
page_icon="📄",
layout="wide"
)
def detect_aspect_ratio(html_content):
"""
Detect aspect ratio from HTML content
Returns: "16:9", "1:1", or "9:16"
"""
# Check for viewport meta tag
viewport_match = re.search(r']*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
if viewport_match:
viewport = viewport_match.group(1).lower()
if 'width=device-width' in viewport or 'width=100%' in viewport:
# Check for orientation hints
if 'orientation=portrait' in viewport:
return "9:16"
elif 'orientation=landscape' in viewport:
return "16:9"
# Check for CSS aspect-ratio property
aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
if aspect_match:
width = int(aspect_match.group(1))
height = int(aspect_match.group(2))
ratio = width / height
if ratio > 1.5:
return "16:9"
elif ratio < 0.7:
return "9:16"
else:
return "1:1"
# Check for common presentation frameworks
if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
return "16:9"
# Check body style for width/height hints
body_match = re.search(r'
]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
if body_match:
style = body_match.group(1).lower()
if 'width' in style and 'height' in style:
width_match = re.search(r'width\s*:\s*(\d+)', style)
height_match = re.search(r'height\s*:\s*(\d+)', style)
if width_match and height_match:
w = int(width_match.group(1))
h = int(height_match.group(1))
ratio = w / h
if ratio > 1.5:
return "16:9"
elif ratio < 0.7:
return "9:16"
# Default to A4 portrait for documents
return "9:16"
def save_uploaded_images(images, temp_dir):
"""Save uploaded images and return mapping"""
image_mapping = {}
images_dir = os.path.join(temp_dir, "images")
os.makedirs(images_dir, exist_ok=True)
for image in images:
# Save image
image_path = os.path.join(images_dir, image.name)
with open(image_path, 'wb') as f:
f.write(image.getvalue())
# Create mapping
image_mapping[image.name] = f"images/{image.name}"
print(f"Saved image: {image.name} -> {image_path}")
return image_mapping
def process_html_with_images(html_content, temp_dir, image_mapping):
"""Process HTML to handle image references with absolute file paths"""
import re
for original_name, relative_path in image_mapping.items():
# Get absolute path for the image
absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
file_url = f"file://{absolute_path}"
# Replace various image reference patterns
# Pattern 1: src="filename" or src='filename'
html_content = re.sub(
rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
f'src="{file_url}"',
html_content,
flags=re.IGNORECASE
)
# Pattern 2: background-image: url(filename)
html_content = re.sub(
rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)',
f'url("{file_url}")',
html_content,
flags=re.IGNORECASE
)
# Pattern 3: href for links
html_content = re.sub(
rf'href=["\'](?:\./)?{re.escape(original_name)}["\']',
f'href="{file_url}"',
html_content,
flags=re.IGNORECASE
)
return html_content
def render_html_preview(html_content):
"""Render HTML preview in an iframe"""
# Encode HTML content
b64 = base64.b64encode(html_content.encode()).decode()
iframe_html = f''
return iframe_html
def render_pdf_preview(pdf_bytes):
"""Render PDF preview using embedded PDF.js"""
b64 = base64.b64encode(pdf_bytes).decode()
pdf_viewer_html = f'''
'''
return pdf_viewer_html
def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
"""
Convert HTML content to PDF using Puppeteer with better styling preservation
Args:
html_content: String containing HTML content
aspect_ratio: One of "16:9", "1:1", or "9:16"
temp_dir: Temporary directory for processing
Returns:
Tuple of (pdf_bytes, error_message)
"""
try:
# Inject CSS to preserve styles better
style_injection = """
"""
# Insert style injection before closing head tag or at the start of body
if '' in html_content:
html_content = html_content.replace('', style_injection + '')
elif ' {new}")
full_path = os.path.join(temp_dir, new)
st.text(f"Full path: {full_path}")
st.text(f"Exists: {os.path.exists(full_path)}")
# Convert to PDF
pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir)
# Cleanup
if temp_dir:
shutil.rmtree(temp_dir, ignore_errors=True)
if error:
st.error(f"❌ {error}")
with st.expander("Show error details"):
st.code(error)
else:
st.success("✅ PDF generated successfully!")
col_a, col_b = st.columns([1, 1])
with col_a:
output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
if not output_filename.endswith('.pdf'):
output_filename += '.pdf'
st.download_button(
label="⬇️ Download PDF",
data=pdf_bytes,
file_name=output_filename,
mime="application/pdf",
width="stretch",
key="download_file_pdf"
)
with col_b:
st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
# PDF Preview
st.subheader("📄 PDF Preview")
st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
except Exception as e:
if temp_dir:
shutil.rmtree(temp_dir, ignore_errors=True)
st.error(f"❌ Error: {str(e)}")
# Tab 2: Paste HTML Code
with tab2:
col1, col2 = st.columns([1, 1])
with col1:
html_code = st.text_area(
"HTML Content",
value="""
Sample Document
Hello, PDF World! 🌍
This is a sample HTML document converted to PDF.
✨ Styles, colors, and gradients are preserved!
""",
height=400,
key="html_code"
)
# Image uploader for text tab
uploaded_images_text = st.file_uploader(
"📷 Upload Images (optional)",
type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
key="image_uploader_text",
help="Upload images referenced in your HTML code",
accept_multiple_files=True
)
if uploaded_images_text:
st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
with st.expander("View uploaded images"):
cols = st.columns(min(len(uploaded_images_text), 4))
for idx, img in enumerate(uploaded_images_text):
with cols[idx % 4]:
st.image(img, caption=img.name, use_container_width=True)
if html_code and html_code.strip():
# Auto-detect aspect ratio
detected_ratio_text = detect_aspect_ratio(html_code)
auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
if auto_detect_text:
aspect_ratio_text = detected_ratio_text
st.info(f"🔍 Detected: **{detected_ratio_text}**")
else:
aspect_ratio_text = st.radio(
"Aspect Ratio",
options=["16:9", "1:1", "9:16"],
index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
key="aspect_text",
help="Select the page orientation and dimensions"
)
convert_text_btn = st.button("🔄 Convert to PDF", key="convert_text", type="primary", width="stretch")
else:
convert_text_btn = False
with col2:
if html_code and html_code.strip():
st.subheader("👁️ HTML Preview")
with st.expander("Show HTML Preview", expanded=False):
st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
if convert_text_btn and html_code and html_code.strip():
temp_dir = None
try:
with st.spinner("Converting HTML to PDF..."):
# Create temp directory
temp_dir = tempfile.mkdtemp()
# Process images if uploaded
processed_html = html_code
if uploaded_images_text:
image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
st.info(f"📷 Processed {len(uploaded_images_text)} image(s)")
# Debug info
with st.expander("🔍 Debug: Image Mapping"):
for orig, new in image_mapping.items():
st.text(f"{orig} -> {new}")
full_path = os.path.join(temp_dir, new)
st.text(f"Full path: {full_path}")
st.text(f"Exists: {os.path.exists(full_path)}")
# Convert to PDF
pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
# Cleanup
if temp_dir:
shutil.rmtree(temp_dir, ignore_errors=True)
if error:
st.error(f"❌ {error}")
with st.expander("Show error details"):
st.code(error)
else:
st.success("✅ PDF generated successfully!")
col_a, col_b = st.columns([1, 1])
with col_a:
st.download_button(
label="⬇️ Download PDF",
data=pdf_bytes,
file_name="converted.pdf",
mime="application/pdf",
width="stretch",
key="download_text_pdf"
)
with col_b:
st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
# PDF Preview
st.subheader("📄 PDF Preview")
st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
except Exception as e:
if temp_dir:
shutil.rmtree(temp_dir, ignore_errors=True)
st.error(f"❌ Error: {str(e)}")
# Footer with tips
st.markdown("---")
st.markdown("""
### 💡 Tips:
- **Auto-detection** analyzes your HTML to suggest the best aspect ratio
- **16:9** - Best for presentations and landscape documents (297mm × 210mm)
- **1:1** - Square format (210mm × 210mm)
- **9:16** - Portrait format, standard A4 (210mm × 297mm)
- **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
- All CSS styles, colors, gradients, and fonts are preserved
- Use inline CSS or `