Spaces:

ABDALLALSWAITI
/

htmlpdf

Sleeping

App Files Files Community

ABDALLALSWAITI commited on Oct 16, 2025

Commit

430bb94

verified ·

1 Parent(s): 442c38f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +173 -71

src/streamlit_app.py CHANGED Viewed

@@ -65,6 +65,40 @@ def detect_aspect_ratio(html_content):
     # Default to A4 portrait for documents
     return "9:16"
 def render_html_preview(html_content):
     """Render HTML preview in an iframe"""
     # Encode HTML content
@@ -73,10 +107,9 @@ def render_html_preview(html_content):
     return iframe_html
 def render_pdf_preview(pdf_bytes):
-    """Render PDF preview using embedded PDF.js for better browser compatibility"""
     b64 = base64.b64encode(pdf_bytes).decode()
-    # Embed PDF.js directly to avoid I/O errors and CORS issues
     pdf_viewer_html = f'''
     <!DOCTYPE html>
     <html>
@@ -126,24 +159,20 @@ def render_pdf_preview(pdf_bytes):
         <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
         <script>
-            // Set worker source
             pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
             const pdfData = atob('{b64}');
             const pdfContainer = document.getElementById('pdf-container');
             const loading = document.getElementById('loading');
-            // Convert base64 to Uint8Array
             const uint8Array = new Uint8Array(pdfData.length);
             for (let i = 0; i < pdfData.length; i++) {{
                 uint8Array[i] = pdfData.charCodeAt(i);
             }}
-            // Load PDF
             pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
                 loading.style.display = 'none';
-                // Render all pages
                 const numPages = pdf.numPages;
                 const promises = [];
@@ -179,22 +208,19 @@ def render_pdf_preview(pdf_bytes):
     '''
     return pdf_viewer_html
-def convert_html_to_pdf(html_content, aspect_ratio):
     """
     Convert HTML content to PDF using Puppeteer with better styling preservation
     Args:
         html_content: String containing HTML content
         aspect_ratio: One of "16:9", "1:1", or "9:16"
     Returns:
         Tuple of (pdf_bytes, error_message)
     """
-    temp_dir = None
     try:
-        # Create temporary directory for processing
-        temp_dir = tempfile.mkdtemp()
         # Inject CSS to preserve styles better
         style_injection = """
         <style>
@@ -252,25 +278,18 @@ def convert_html_to_pdf(html_content, aspect_ratio):
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
-        # Clean up temporary directory
-        shutil.rmtree(temp_dir, ignore_errors=True)
         return pdf_bytes, None
     except subprocess.TimeoutExpired:
-        if temp_dir:
-            shutil.rmtree(temp_dir, ignore_errors=True)
         return None, "Error: PDF conversion timed out (60 seconds)"
     except Exception as e:
-        if temp_dir:
-            shutil.rmtree(temp_dir, ignore_errors=True)
         return None, f"Error: {str(e)}"
 # Page header
 st.title("📄 HTML to PDF Converter")
 st.markdown("""
 Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
-Preserves styles, fonts, colors, and layout.
 """)
 # Create tabs
@@ -286,6 +305,23 @@ with tab1:
         accept_multiple_files=False
     )
     if uploaded_file is not None:
         st.success(f"✅ File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
@@ -335,38 +371,58 @@ with tab1:
         # Conversion section
         if convert_file_btn:
-            with st.spinner("Converting HTML to PDF..."):
-                pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file)
-                if error:
-                    st.error(f"❌ {error}")
-                    with st.expander("Show error details"):
-                        st.code(error)
-                else:
-                    st.success("✅ PDF generated successfully!")
-                    col_a, col_b = st.columns([1, 1])
-                    with col_a:
-                        output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
-                        if not output_filename.endswith('.pdf'):
-                            output_filename += '.pdf'
-                        st.download_button(
-                            label="⬇️ Download PDF",
-                            data=pdf_bytes,
-                            file_name=output_filename,
-                            mime="application/pdf",
-                            use_container_width=True,
-                            key="download_file_pdf"
-                        )
-                    with col_b:
-                        st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
-                    # PDF Preview
-                    st.subheader("📄 PDF Preview")
-                    st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
 # Tab 2: Paste HTML Code
 with tab2:
@@ -415,6 +471,23 @@ with tab2:
             key="html_code"
         )
         if html_code and html_code.strip():
             # Auto-detect aspect ratio
             detected_ratio_text = detect_aspect_ratio(html_code)
@@ -444,34 +517,55 @@ with tab2:
                 st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
     if convert_text_btn and html_code and html_code.strip():
-        with st.spinner("Converting HTML to PDF..."):
-            pdf_bytes, error = convert_html_to_pdf(html_code, aspect_ratio_text)
-            if error:
-                st.error(f"❌ {error}")
-                with st.expander("Show error details"):
-                    st.code(error)
-            else:
-                st.success("✅ PDF generated successfully!")
-                col_a, col_b = st.columns([1, 1])
-                with col_a:
-                    st.download_button(
-                        label="⬇️ Download PDF",
-                        data=pdf_bytes,
-                        file_name="converted.pdf",
-                        mime="application/pdf",
-                        use_container_width=True,
-                        key="download_text_pdf"
-                    )
-                with col_b:
-                    st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
-                # PDF Preview
-                st.subheader("📄 PDF Preview")
-                st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
 # Footer with tips
 st.markdown("---")
@@ -481,8 +575,16 @@ st.markdown("""
 - **16:9** - Best for presentations and landscape documents (297mm × 210mm)
 - **1:1** - Square format (210mm × 210mm)
 - **9:16** - Portrait format, standard A4 (210mm × 297mm)
 - All CSS styles, colors, gradients, and fonts are preserved
 - Use inline CSS or `<style>` tags for best results
 - External resources should use absolute URLs
 - **PDF Preview** renders directly in the browser using PDF.js
 """)

     # Default to A4 portrait for documents
     return "9:16"
+def save_uploaded_images(images, temp_dir):
+    """Save uploaded images and return mapping"""
+    image_mapping = {}
+    images_dir = os.path.join(temp_dir, "images")
+    os.makedirs(images_dir, exist_ok=True)
+    for image in images:
+        # Save image
+        image_path = os.path.join(images_dir, image.name)
+        with open(image_path, 'wb') as f:
+            f.write(image.getvalue())
+        # Create mapping
+        image_mapping[image.name] = f"images/{image.name}"
+    return image_mapping
+def process_html_with_images(html_content, image_mapping):
+    """Process HTML to handle image references"""
+    for original_name, new_path in image_mapping.items():
+        # Handle various image reference patterns
+        patterns = [
+            (f'src="{original_name}"', f'src="{new_path}"'),
+            (f"src='{original_name}'", f"src='{new_path}'"),
+            (f'href="{original_name}"', f'href="{new_path}"'),
+            (f"href='{original_name}'", f"href='{new_path}'"),
+        ]
+        for old_pattern, new_pattern in patterns:
+            if old_pattern in html_content:
+                html_content = html_content.replace(old_pattern, new_pattern)
+    return html_content
 def render_html_preview(html_content):
     """Render HTML preview in an iframe"""
     # Encode HTML content
     return iframe_html
 def render_pdf_preview(pdf_bytes):
+    """Render PDF preview using embedded PDF.js"""
     b64 = base64.b64encode(pdf_bytes).decode()
     pdf_viewer_html = f'''
     <!DOCTYPE html>
     <html>
         <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
         <script>
             pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
             const pdfData = atob('{b64}');
             const pdfContainer = document.getElementById('pdf-container');
             const loading = document.getElementById('loading');
             const uint8Array = new Uint8Array(pdfData.length);
             for (let i = 0; i < pdfData.length; i++) {{
                 uint8Array[i] = pdfData.charCodeAt(i);
             }}
             pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
                 loading.style.display = 'none';
                 const numPages = pdf.numPages;
                 const promises = [];
     '''
     return pdf_viewer_html
+def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
     """
     Convert HTML content to PDF using Puppeteer with better styling preservation
     Args:
         html_content: String containing HTML content
         aspect_ratio: One of "16:9", "1:1", or "9:16"
+        temp_dir: Temporary directory for processing
     Returns:
         Tuple of (pdf_bytes, error_message)
     """
     try:
         # Inject CSS to preserve styles better
         style_injection = """
         <style>
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
         return pdf_bytes, None
     except subprocess.TimeoutExpired:
         return None, "Error: PDF conversion timed out (60 seconds)"
     except Exception as e:
         return None, f"Error: {str(e)}"
 # Page header
 st.title("📄 HTML to PDF Converter")
 st.markdown("""
 Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
+✨ **NEW:** Upload images alongside your HTML files!
 """)
 # Create tabs
         accept_multiple_files=False
     )
+    # Image uploader
+    uploaded_images = st.file_uploader(
+        "📷 Upload Images (optional)",
+        type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
+        key="image_uploader",
+        help="Upload images referenced in your HTML",
+        accept_multiple_files=True
+    )
+    if uploaded_images:
+        st.success(f"✅ {len(uploaded_images)} image(s) uploaded")
+        with st.expander("View uploaded images"):
+            cols = st.columns(min(len(uploaded_images), 4))
+            for idx, img in enumerate(uploaded_images):
+                with cols[idx % 4]:
+                    st.image(img, caption=img.name, use_container_width=True)
     if uploaded_file is not None:
         st.success(f"✅ File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
         # Conversion section
         if convert_file_btn:
+            temp_dir = None
+            try:
+                with st.spinner("Converting HTML to PDF..."):
+                    # Create temp directory
+                    temp_dir = tempfile.mkdtemp()
+                    # Process images if uploaded
+                    if uploaded_images:
+                        image_mapping = save_uploaded_images(uploaded_images, temp_dir)
+                        html_content = process_html_with_images(html_content, image_mapping)
+                        st.info(f"📷 Processed {len(uploaded_images)} image(s)")
+                    # Convert to PDF
+                    pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file, temp_dir)
+                    # Cleanup
+                    if temp_dir:
+                        shutil.rmtree(temp_dir, ignore_errors=True)
+                    if error:
+                        st.error(f"❌ {error}")
+                        with st.expander("Show error details"):
+                            st.code(error)
+                    else:
+                        st.success("✅ PDF generated successfully!")
+                        col_a, col_b = st.columns([1, 1])
+                        with col_a:
+                            output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
+                            if not output_filename.endswith('.pdf'):
+                                output_filename += '.pdf'
+                            st.download_button(
+                                label="⬇️ Download PDF",
+                                data=pdf_bytes,
+                                file_name=output_filename,
+                                mime="application/pdf",
+                                use_container_width=True,
+                                key="download_file_pdf"
+                            )
+                        with col_b:
+                            st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
+                        # PDF Preview
+                        st.subheader("📄 PDF Preview")
+                        st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
+            except Exception as e:
+                if temp_dir:
+                    shutil.rmtree(temp_dir, ignore_errors=True)
+                st.error(f"❌ Error: {str(e)}")
 # Tab 2: Paste HTML Code
 with tab2:
             key="html_code"
         )
+        # Image uploader for text tab
+        uploaded_images_text = st.file_uploader(
+            "📷 Upload Images (optional)",
+            type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
+            key="image_uploader_text",
+            help="Upload images referenced in your HTML code",
+            accept_multiple_files=True
+        )
+        if uploaded_images_text:
+            st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
+            with st.expander("View uploaded images"):
+                cols = st.columns(min(len(uploaded_images_text), 4))
+                for idx, img in enumerate(uploaded_images_text):
+                    with cols[idx % 4]:
+                        st.image(img, caption=img.name, use_container_width=True)
         if html_code and html_code.strip():
             # Auto-detect aspect ratio
             detected_ratio_text = detect_aspect_ratio(html_code)
                 st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
     if convert_text_btn and html_code and html_code.strip():
+        temp_dir = None
+        try:
+            with st.spinner("Converting HTML to PDF..."):
+                # Create temp directory
+                temp_dir = tempfile.mkdtemp()
+                # Process images if uploaded
+                processed_html = html_code
+                if uploaded_images_text:
+                    image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
+                    processed_html = process_html_with_images(html_code, image_mapping)
+                    st.info(f"📷 Processed {len(uploaded_images_text)} image(s)")
+                # Convert to PDF
+                pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
+                # Cleanup
+                if temp_dir:
+                    shutil.rmtree(temp_dir, ignore_errors=True)
+                if error:
+                    st.error(f"❌ {error}")
+                    with st.expander("Show error details"):
+                        st.code(error)
+                else:
+                    st.success("✅ PDF generated successfully!")
+                    col_a, col_b = st.columns([1, 1])
+                    with col_a:
+                        st.download_button(
+                            label="⬇️ Download PDF",
+                            data=pdf_bytes,
+                            file_name="converted.pdf",
+                            mime="application/pdf",
+                            use_container_width=True,
+                            key="download_text_pdf"
+                        )
+                    with col_b:
+                        st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
+                    # PDF Preview
+                    st.subheader("📄 PDF Preview")
+                    st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
+        except Exception as e:
+            if temp_dir:
+                shutil.rmtree(temp_dir, ignore_errors=True)
+            st.error(f"❌ Error: {str(e)}")
 # Footer with tips
 st.markdown("---")
 - **16:9** - Best for presentations and landscape documents (297mm × 210mm)
 - **1:1** - Square format (210mm × 210mm)
 - **9:16** - Portrait format, standard A4 (210mm × 297mm)
+- **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
 - All CSS styles, colors, gradients, and fonts are preserved
 - Use inline CSS or `<style>` tags for best results
+- Reference images by filename in your HTML (e.g., `<img src="image.jpg">`)
 - External resources should use absolute URLs
 - **PDF Preview** renders directly in the browser using PDF.js
+### 🖼️ Using Images:
+1. Upload your HTML file
+2. Upload all images referenced in the HTML
+3. Make sure image filenames in HTML match uploaded files exactly
+4. The converter will automatically embed images in the PDF
 """)