Spaces:

ABDALLALSWAITI
/

htmlpdf

Sleeping

App Files Files Community

ABDALLALSWAITI commited on Oct 16, 2025

Commit

e126d9e

verified ·

1 Parent(s): 5e552f0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +226 -515

src/streamlit_app.py CHANGED Viewed

@@ -1,22 +1,28 @@
 """
-Streamlit HTML to PDF Converter with Image Support and Proper Page Breaks
-Save this file as: src/streamlit_app.py
 """
 import streamlit as st
 import subprocess
 import os
 import tempfile
 import shutil
-from pathlib import Path
 import base64
 import re
 import mimetypes
-st.set_page_config(
-    page_title="HTML to PDF Converter",
-    page_icon="📄",
-    layout="wide"
-)
 def detect_aspect_ratio(html_content):
     """Detect aspect ratio from HTML content"""
@@ -48,10 +54,17 @@ def detect_aspect_ratio(html_content):
 def image_to_base64(image_file):
     """Convert uploaded image to base64 data URL"""
     try:
-        image_bytes = image_file.getvalue()
-        mime_type, _ = mimetypes.guess_type(image_file.name)
         if not mime_type:
-            ext = os.path.splitext(image_file.name)[1].lower()
             mime_map = {
                 '.jpg': 'image/jpeg',
                 '.jpeg': 'image/jpeg',
@@ -67,7 +80,7 @@ def image_to_base64(image_file):
         data_url = f"data:{mime_type};base64,{b64_data}"
         return data_url
     except Exception as e:
-        st.error(f"Error converting {image_file.name} to base64: {str(e)}")
         return None
 def embed_images_as_base64(html_content, uploaded_images):
@@ -79,8 +92,8 @@ def embed_images_as_base64(html_content, uploaded_images):
     for img in uploaded_images:
         data_url = image_to_base64(img)
         if data_url:
-            image_data_urls[img.name] = data_url
-            st.write(f"✓ Converted {img.name} to base64 ({len(data_url)} chars)")
     if not image_data_urls:
         return html_content, {}
@@ -90,155 +103,42 @@ def embed_images_as_base64(html_content, uploaded_images):
     for filename, data_url in image_data_urls.items():
         escaped_name = re.escape(filename)
-        # Pattern 1: img src attribute
         pattern1 = rf'(<img[^>]*\s+src\s*=\s*)(["\'])(?:[^"\']*?/)?{escaped_name}\2'
         matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE | re.DOTALL))
-        count1 = len(matches1)
         if matches1:
             html_content = re.sub(pattern1, rf'\1\2{data_url}\2', html_content, flags=re.IGNORECASE | re.DOTALL)
-            replacements[f"{filename} (img src)"] = count1
-        # Pattern 2: background-image
         pattern2 = rf'(background-image\s*:\s*url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
         matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
-        count2 = len(matches2)
         if matches2:
             html_content = re.sub(pattern2, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
-            replacements[f"{filename} (bg-image)"] = count2
-        # Pattern 3: CSS url()
         pattern3 = rf'(url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
         matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
-        count3 = len(matches3)
         if matches3:
             html_content = re.sub(pattern3, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
-            replacements[f"{filename} (url)"] = count3
-    if replacements:
-        st.success("✅ Image Replacements:")
-        for key, count in replacements.items():
-            st.write(f"  • {key}: {count} replacement(s)")
-    else:
-        st.warning("⚠️ No image references found in HTML matching uploaded files!")
-        st.write("Uploaded files:", [img.name for img in uploaded_images])
-        with st.expander("🔍 Debug: Show HTML image references"):
-            img_lines = [line for line in html_content.split('\n')
-                        if any(k in line.lower() for k in ['<img', 'src=', 'url(', 'background'])]
-            if img_lines:
-                for line in img_lines[:10]:
-                    st.code(line.strip(), language='html')
-            else:
-                st.write("No image-related lines found in HTML")
     return html_content, replacements
 def inject_page_breaks(html_content: str, aspect_ratio: str):
-    """Automatically inject page breaks and page sizing CSS"""
-    # Determine page orientation
-    if aspect_ratio == "16:9":
-        page_size = "A4 landscape"
-        orientation = "landscape"
-    elif aspect_ratio == "1:1":
-        page_size = "210mm 210mm"
-        orientation = "portrait"
-    else:  # 9:16
-        page_size = "A4 portrait"
-        orientation = "portrait"
-    # Comprehensive page break CSS
     page_css = f"""
     <style id="auto-page-breaks">
-        /* Define page size */
-        @page {{
-            size: {page_size};
-            margin: 0;
-        }}
-        /* Reset body */
-        html, body {{
-            margin: 0 !important;
-            padding: 0 !important;
-            width: 100% !important;
-            height: 100% !important;
-        }}
-        /* Page containers - each should be one page */
-        .page, .slide, section.page, article.page, div[class*="page"], div[class*="slide"] {{
-            width: 100% !important;
-            min-height: 100vh !important;
-            height: 100vh !important;
-            page-break-after: always !important;
-            break-after: page !important;
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-            position: relative !important;
-            box-sizing: border-box !important;
-            overflow: hidden !important;
-        }}
-        /* Last page shouldn't force a break */
-        .page:last-child, .slide:last-child,
-        section.page:last-child, article.page:last-child {{
-            page-break-after: auto !important;
-            break-after: auto !important;
-        }}
-        /* If no explicit page class, treat direct body children as pages */
-        body > section:not(.no-page-break),
-        body > article:not(.no-page-break),
-        body > div:not(.no-page-break) {{
-            page-break-after: always !important;
-            break-after: page !important;
-            min-height: 100vh;
-        }}
-        body > section:last-child,
-        body > article:last-child,
-        body > div:last-child {{
-            page-break-after: auto !important;
-        }}
-        /* Utility classes for manual control */
-        .page-break, .page-break-after {{
-            page-break-after: always !important;
-            break-after: page !important;
-        }}
-        .page-break-before {{
-            page-break-before: always !important;
-            break-before: page !important;
-        }}
-        .no-page-break, .keep-together {{
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-        }}
-        /* Prevent awkward breaks in content */
-        h1, h2, h3, h4, h5, h6 {{
-            page-break-after: avoid !important;
-            break-after: avoid !important;
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-        }}
-        img, figure, table, pre, blockquote {{
-            page-break-inside: avoid !important;
-            break-inside: avoid !important;
-        }}
-        /* Preserve colors and backgrounds */
-        * {{
-            -webkit-print-color-adjust: exact !important;
-            print-color-adjust: exact !important;
-            color-adjust: exact !important;
-        }}
     </style>
     """
-    # Inject CSS into HTML
     if '</head>' in html_content:
         html_content = html_content.replace('</head>', page_css + '</head>')
     elif '<body' in html_content:
@@ -248,142 +148,32 @@ def inject_page_breaks(html_content: str, aspect_ratio: str):
     return html_content
-def render_html_preview(html_content):
-    """Render HTML preview in an iframe"""
-    b64 = base64.b64encode(html_content.encode()).decode()
-    iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
-    return iframe_html
-def render_pdf_preview(pdf_bytes):
-    """Render PDF preview using embedded PDF.js"""
-    b64 = base64.b64encode(pdf_bytes).decode()
-    pdf_viewer_html = f'''
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <style>
-            body {{
-                margin: 0;
-                padding: 0;
-                overflow: hidden;
-                background: #525659;
-            }}
-            #pdf-container {{
-                width: 100%;
-                height: 100vh;
-                overflow: auto;
-                display: flex;
-                flex-direction: column;
-                align-items: center;
-                padding: 20px;
-                box-sizing: border-box;
-            }}
-            canvas {{
-                box-shadow: 0 2px 8px rgba(0,0,0,0.3);
-                margin-bottom: 10px;
-                background: white;
-            }}
-            #loading {{
-                color: white;
-                font-family: Arial, sans-serif;
-                font-size: 18px;
-                padding: 20px;
-            }}
-        </style>
-    </head>
-    <body>
-        <div id="pdf-container">
-            <div id="loading">Loading PDF...</div>
-        </div>
-        <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
-        <script>
-            pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
-            const pdfData = atob('{b64}');
-            const pdfContainer = document.getElementById('pdf-container');
-            const loading = document.getElementById('loading');
-            const uint8Array = new Uint8Array(pdfData.length);
-            for (let i = 0; i < pdfData.length; i++) {{
-                uint8Array[i] = pdfData.charCodeAt(i);
-            }}
-            pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
-                loading.style.display = 'none';
-                const numPages = pdf.numPages;
-                const promises = [];
-                for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
-                    promises.push(
-                        pdf.getPage(pageNum).then(function(page) {{
-                            const scale = 1.5;
-                            const viewport = page.getViewport({{scale: scale}});
-                            const canvas = document.createElement('canvas');
-                            const context = canvas.getContext('2d');
-                            canvas.height = viewport.height;
-                            canvas.width = viewport.width;
-                            pdfContainer.appendChild(canvas);
-                            return page.render({{
-                                canvasContext: context,
-                                viewport: viewport
-                            }}).promise;
-                        }})
-                    );
-                }}
-                return Promise.all(promises);
-            }}).catch(function(error) {{
-                loading.innerHTML = '<div style="color:#ff6b6b;">Error: ' + error.message + '</div>';
-            }});
-        </script>
-    </body>
-    </html>
-    '''
-    return pdf_viewer_html
 def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
-    """Convert HTML content to PDF using Puppeteer with proper page breaks"""
     try:
-        # Step 1: Inject page break CSS
-        st.write("🔧 Injecting page break CSS...")
         html_content = inject_page_breaks(html_content, aspect_ratio)
-        # Save HTML to temp file
         html_file = os.path.join(temp_dir, "input.html")
         with open(html_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
-        st.write(f"📝 Saved HTML: {os.path.getsize(html_file):,} bytes")
-        # Find puppeteer script
         script_dir = os.path.dirname(os.path.abspath(__file__))
-        possible_paths = [
-            os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js'),
-            os.path.join(script_dir, 'puppeteer_pdf.js'),
-            os.path.join(script_dir, '..', 'puppeteer_pdf.js'),
-            'puppeteer_pdf.js'
-        ]
-        puppeteer_script = None
-        for path in possible_paths:
-            if os.path.exists(path):
-                puppeteer_script = path
-                break
-        if not puppeteer_script:
-            return None, "Error: puppeteer_pdf.js not found"
-        st.write(f"🔧 Using Puppeteer: {puppeteer_script}")
-        # Run conversion
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
-            cwd=os.path.dirname(os.path.abspath(puppeteer_script))
         )
         if result.returncode != 0:
             return None, f"PDF conversion failed: {result.stderr}"
-        # Read PDF
         pdf_file = html_file.replace('.html', '.pdf')
         if not os.path.exists(pdf_file):
             return None, "PDF file was not generated"
@@ -391,305 +181,226 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
-        st.write(f"✅ PDF generated: {len(pdf_bytes):,} bytes")
         return pdf_bytes, None
     except subprocess.TimeoutExpired:
-        return None, "Error: PDF conversion timed out (60 seconds)"
     except Exception as e:
         return None, f"Error: {str(e)}"
-# Main UI
 st.title("📄 HTML to PDF Converter")
-st.markdown("""
-Convert HTML to PDF with **proper page breaks** and **embedded base64 images**!
-✨ Each page in your HTML will be preserved as a separate PDF page.
 """)
-# Create tabs
-tab1, tab2 = st.tabs(["📤 Upload HTML File", "📝 Paste HTML Code"])
-# Tab 1: Upload HTML File
 with tab1:
-    uploaded_file = st.file_uploader(
-        "Choose an HTML file",
-        type=['html', 'htm'],
-        key="file_uploader",
-        help="Upload an HTML file"
-    )
-    uploaded_images = st.file_uploader(
-        "📷 Upload Images",
-        type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
-        key="image_uploader",
-        help="Upload images - they will be embedded as base64 in the HTML",
-        accept_multiple_files=True
-    )
-    if uploaded_images:
-        st.success(f"✅ {len(uploaded_images)} image(s) uploaded")
-        with st.expander("View uploaded images"):
-            cols = st.columns(min(len(uploaded_images), 4))
-            for idx, img in enumerate(uploaded_images):
-                with cols[idx % 4]:
-                    st.image(img, caption=img.name, use_container_width=True)
     if uploaded_file:
-        st.success(f"✅ File: {uploaded_file.name}")
-        uploaded_file.seek(0)
-        try:
-            html_content = uploaded_file.getvalue().decode('utf-8')
-        except UnicodeDecodeError:
-            uploaded_file.seek(0)
-            html_content = uploaded_file.getvalue().decode('latin-1')
         detected_ratio = detect_aspect_ratio(html_content)
-        col1, col2 = st.columns([1, 1])
-        with col1:
-            st.subheader("⚙️ Settings")
-            auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_file")
-            if auto_detect:
-                aspect_ratio = detected_ratio
-                st.info(f"🔍 Detected: **{detected_ratio}**")
-            else:
-                aspect_ratio = st.radio(
-                    "Aspect Ratio",
-                    options=["16:9", "1:1", "9:16"],
-                    index=["16:9", "1:1", "9:16"].index(detected_ratio),
-                    key="aspect_file"
-                )
-            convert_btn = st.button("🔄 Convert to PDF", key="conv_file", type="primary", use_container_width=True)
-        with col2:
-            st.subheader("👁️ Preview")
-            with st.expander("Show HTML"):
-                st.components.v1.html(render_html_preview(html_content), height=400, scrolling=True)
-        if convert_btn:
-            temp_dir = None
             try:
                 with st.spinner("Converting..."):
-                    temp_dir = tempfile.mkdtemp()
-                    # Embed images as base64
                     processed_html = html_content
                     if uploaded_images:
-                        with st.expander("🖼️ Image Processing", expanded=True):
-                            processed_html, replacements = embed_images_as_base64(html_content, uploaded_images)
-                            if not replacements:
-                                st.warning("⚠️ Images uploaded but no matches found in HTML!")
-                                st.write("**Tip:** Make sure image filenames in HTML match uploaded files exactly")
-                    # Convert to PDF
                     pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio, temp_dir)
                     if error:
                         st.error(f"❌ {error}")
                     else:
-                        st.success("✅ PDF generated with proper page breaks!")
-                        output_name = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
-                        if not output_name.endswith('.pdf'):
-                            output_name += '.pdf'
-                        col_a, col_b = st.columns(2)
-                        with col_a:
-                            st.download_button(
-                                "⬇️ Download PDF",
-                                data=pdf_bytes,
-                                file_name=output_name,
-                                mime="application/pdf",
-                                use_container_width=True
-                            )
-                        with col_b:
-                            st.info(f"Size: {len(pdf_bytes):,} bytes")
-                        st.subheader("📄 PDF Preview")
-                        st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
-            except Exception as e:
-                st.error(f"❌ Error: {str(e)}")
             finally:
-                if temp_dir and os.path.exists(temp_dir):
                     shutil.rmtree(temp_dir, ignore_errors=True)
-# Tab 2: Paste HTML
 with tab2:
-    html_code = st.text_area(
-        "HTML Content",
-        value="""<!DOCTYPE html>
 <html>
-<head>
-    <style>
-        body {
-            font-family: Arial;
-            margin: 0;
-            padding: 0;
-        }
-        .page {
-            width: 100%;
-            height: 100vh;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            box-sizing: border-box;
-            padding: 40px;
-        }
-        .page:nth-child(1) {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            color: white;
-        }
-        .page:nth-child(2) {
-            background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
-            color: white;
-        }
-        .page:nth-child(3) {
-            background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
-            color: white;
-        }
-        h1 { font-size: 48px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); }
-    </style>
-</head>
 <body>
-    <div class="page">
-        <h1>Page 1: Hello PDF! 🌍</h1>
-    </div>
-    <div class="page">
-        <h1>Page 2: Separate Page! 📄</h1>
     </div>
-    <div class="page">
-        <h1>Page 3: Final Page! ✨</h1>
     </div>
 </body>
-</html>""",
-        height=400,
-        key="html_code"
-    )
-    uploaded_images_text = st.file_uploader(
-        "📷 Upload Images",
-        type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
-        key="image_text",
-        help="Upload images to embed in your HTML",
-        accept_multiple_files=True
     )
-    if uploaded_images_text:
-        st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
-        with st.expander("View images"):
-            cols = st.columns(min(len(uploaded_images_text), 4))
-            for idx, img in enumerate(uploaded_images_text):
-                with cols[idx % 4]:
-                    st.image(img, caption=img.name, use_container_width=True)
-    if html_code.strip():
-        detected_ratio_text = detect_aspect_ratio(html_code)
-        auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_text")
-        if auto_detect_text:
-            aspect_ratio_text = detected_ratio_text
-            st.info(f"🔍 Detected: **{detected_ratio_text}**")
-        else:
-            aspect_ratio_text = st.radio(
-                "Aspect Ratio",
-                options=["16:9", "1:1", "9:16"],
-                index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
-                key="aspect_text"
-            )
-        convert_text_btn = st.button("🔄 Convert", key="conv_text", type="primary", use_container_width=True)
-        if convert_text_btn:
-            temp_dir = None
-            try:
-                with st.spinner("Converting..."):
-                    temp_dir = tempfile.mkdtemp()
-                    processed_html = html_code
-                    if uploaded_images_text:
-                        with st.expander("🖼️ Image Processing", expanded=True):
-                            processed_html, replacements = embed_images_as_base64(html_code, uploaded_images_text)
-                            if not replacements:
-                                st.warning("⚠️ Images uploaded but no matches found!")
-                    pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
-                    if error:
-                        st.error(f"❌ {error}")
-                    else:
-                        st.success("✅ PDF generated with proper page breaks!")
-                        col_a, col_b = st.columns(2)
-                        with col_a:
-                            st.download_button(
-                                "⬇️ Download PDF",
-                                data=pdf_bytes,
-                                file_name="converted.pdf",
-                                mime="application/pdf",
-                                use_container_width=True
-                            )
-                        with col_b:
-                            st.info(f"Size: {len(pdf_bytes):,} bytes")
-                        st.subheader("📄 PDF Preview")
-                        st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
-            except Exception as e:
-                st.error(f"❌ Error: {str(e)}")
-            finally:
-                if temp_dir and os.path.exists(temp_dir):
-                    shutil.rmtree(temp_dir, ignore_errors=True)
-# Footer
 st.markdown("---")
-st.markdown("""
-### 💡 How Page Breaks Work:
-**Automatic Page Detection:**
-- Elements with class `page`, `slide`, or `section.page` are treated as separate pages
-- Each page automatically gets `page-break-after: always` CSS
-- Last page won't have a trailing break
-**HTML Structure for Multiple Pages:**
-```html
-<div class="page">Page 1 content</div>
-<div class="page">Page 2 content</div>
-<div class="page">Page 3 content</div>
-```
-**Manual Page Breaks:**
-- Add class `page-break` to force a break after an element
-- Add class `page-break-before` to force a break before an element
-- Add class `no-page-break` to prevent breaks inside an element
-**Image Embedding:**
-- Images are converted to base64 and embedded directly in HTML
-- Ensures images always appear in the PDF
-- Filename in HTML must match uploaded file exactly
-### 📝 Example HTML:
-```html
-<!DOCTYPE html>
-<html>
-<body>
-    <div class="page">
-        <h1>First Page</h1>
-        <img src="logo.png" alt="Logo">
-    </div>
-    <div class="page">
-        <h1>Second Page</h1>
-        <p>Content here...</p>
-    </div>
-</body>
-</html>
-```
-Then upload a file named: `logo.png`
-""")

 """
+Combined Streamlit UI + FastAPI REST API
+Single port solution for Hugging Face Spaces
+Save as: app.py
 """
 import streamlit as st
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import Response
+from fastapi.middleware.cors import CORSMiddleware
 import subprocess
 import os
 import tempfile
 import shutil
 import base64
 import re
 import mimetypes
+from typing import List, Optional
+import uvicorn
+import threading
+# Import your existing conversion functions
+import sys
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+# ============= SHARED CONVERSION FUNCTIONS =============
 def detect_aspect_ratio(html_content):
     """Detect aspect ratio from HTML content"""
 def image_to_base64(image_file):
     """Convert uploaded image to base64 data URL"""
     try:
+        if hasattr(image_file, 'getvalue'):
+            image_bytes = image_file.getvalue()
+            filename = image_file.name
+        else:
+            image_file.file.seek(0)
+            image_bytes = image_file.file.read()
+            filename = image_file.filename
+        mime_type, _ = mimetypes.guess_type(filename)
         if not mime_type:
+            ext = os.path.splitext(filename)[1].lower()
             mime_map = {
                 '.jpg': 'image/jpeg',
                 '.jpeg': 'image/jpeg',
         data_url = f"data:{mime_type};base64,{b64_data}"
         return data_url
     except Exception as e:
+        print(f"Error converting to base64: {str(e)}")
         return None
 def embed_images_as_base64(html_content, uploaded_images):
     for img in uploaded_images:
         data_url = image_to_base64(img)
         if data_url:
+            filename = img.name if hasattr(img, 'name') else img.filename
+            image_data_urls[filename] = data_url
     if not image_data_urls:
         return html_content, {}
     for filename, data_url in image_data_urls.items():
         escaped_name = re.escape(filename)
         pattern1 = rf'(<img[^>]*\s+src\s*=\s*)(["\'])(?:[^"\']*?/)?{escaped_name}\2'
         matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE | re.DOTALL))
         if matches1:
             html_content = re.sub(pattern1, rf'\1\2{data_url}\2', html_content, flags=re.IGNORECASE | re.DOTALL)
+            replacements[f"{filename} (img)"] = len(matches1)
         pattern2 = rf'(background-image\s*:\s*url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
         matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
         if matches2:
             html_content = re.sub(pattern2, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
+            replacements[f"{filename} (bg)"] = len(matches2)
         pattern3 = rf'(url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
         matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
         if matches3:
             html_content = re.sub(pattern3, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
+            replacements[f"{filename} (url)"] = len(matches3)
     return html_content, replacements
 def inject_page_breaks(html_content: str, aspect_ratio: str):
+    """Inject page break CSS"""
+    page_size = "A4 landscape" if aspect_ratio == "16:9" else ("210mm 210mm" if aspect_ratio == "1:1" else "A4 portrait")
     page_css = f"""
     <style id="auto-page-breaks">
+        @page {{ size: {page_size}; margin: 0; }}
+        html, body {{ margin: 0 !important; padding: 0 !important; }}
+        .page, .slide {{ width: 100% !important; min-height: 100vh !important; height: 100vh !important;
+            page-break-after: always !important; break-after: page !important;
+            page-break-inside: avoid !important; break-inside: avoid !important; }}
+        .page:last-child, .slide:last-child {{ page-break-after: auto !important; }}
+        * {{ -webkit-print-color-adjust: exact !important; print-color-adjust: exact !important; }}
     </style>
     """
     if '</head>' in html_content:
         html_content = html_content.replace('</head>', page_css + '</head>')
     elif '<body' in html_content:
     return html_content
 def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
+    """Convert HTML to PDF using Puppeteer"""
     try:
         html_content = inject_page_breaks(html_content, aspect_ratio)
         html_file = os.path.join(temp_dir, "input.html")
         with open(html_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
         script_dir = os.path.dirname(os.path.abspath(__file__))
+        puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
+        if not os.path.exists(puppeteer_script):
+            return None, f"Error: puppeteer_pdf.js not found"
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
+            cwd=script_dir
         )
         if result.returncode != 0:
             return None, f"PDF conversion failed: {result.stderr}"
         pdf_file = html_file.replace('.html', '.pdf')
         if not os.path.exists(pdf_file):
             return None, "PDF file was not generated"
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
         return pdf_bytes, None
     except subprocess.TimeoutExpired:
+        return None, "Error: PDF conversion timed out"
     except Exception as e:
         return None, f"Error: {str(e)}"
+# ============= FASTAPI APP =============
+api_app = FastAPI(title="HTML to PDF API", version="3.0")
+api_app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@api_app.get("/api/health")
+async def health():
+    return {"status": "healthy", "version": "3.0"}
+@api_app.post("/api/convert")
+async def api_convert(
+    html_file: Optional[UploadFile] = File(None),
+    html_content: Optional[str] = Form(None),
+    aspect_ratio: Optional[str] = Form(None),
+    auto_detect: bool = Form(True),
+    images: Optional[List[UploadFile]] = File(None)
+):
+    """Convert HTML to PDF via API"""
+    temp_dir = None
+    try:
+        if not html_file and not html_content:
+            raise HTTPException(status_code=400, detail="html_file or html_content required")
+        if html_file:
+            content = await html_file.read()
+            html = content.decode('utf-8', errors='replace')
+            filename = html_file.filename
+        else:
+            html = html_content
+            filename = "converted.pdf"
+        temp_dir = tempfile.mkdtemp()
+        if images:
+            html, _ = embed_images_as_base64(html, images)
+        if auto_detect or not aspect_ratio:
+            aspect_ratio = detect_aspect_ratio(html)
+        pdf_bytes, error = convert_html_to_pdf(html, aspect_ratio, temp_dir)
+        if error:
+            raise HTTPException(status_code=500, detail=error)
+        output_filename = filename.replace('.html', '.pdf')
+        if not output_filename.endswith('.pdf'):
+            output_filename = 'converted.pdf'
+        return Response(
+            content=pdf_bytes,
+            media_type="application/pdf",
+            headers={"Content-Disposition": f"attachment; filename={output_filename}"}
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        if temp_dir and os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir, ignore_errors=True)
+# ============= START FASTAPI IN BACKGROUND =============
+def run_fastapi():
+    """Run FastAPI server in background thread"""
+    uvicorn.run(api_app, host="0.0.0.0", port=8000, log_level="error")
+# Start FastAPI in background thread
+api_thread = threading.Thread(target=run_fastapi, daemon=True)
+api_thread.start()
+# ============= STREAMLIT UI =============
+st.set_page_config(page_title="HTML to PDF Converter", page_icon="📄", layout="wide")
 st.title("📄 HTML to PDF Converter")
+# Add API info banner
+st.info("""
+🚀 **API Available!** This space includes a REST API:
+- Health: `GET /api/health`
+- Convert: `POST /api/convert`
+- Example: `curl -X POST https://abdallalswaiti-htmlpdf.hf.space/api/convert -F 'html_content=<html>...</html>' --output out.pdf`
 """)
+tab1, tab2, tab3 = st.tabs(["📤 Upload HTML", "📝 Paste HTML", "📚 API Docs"])
 with tab1:
+    uploaded_file = st.file_uploader("Choose HTML file", type=['html', 'htm'])
+    uploaded_images = st.file_uploader("📷 Upload Images", type=['jpg', 'jpeg', 'png', 'gif', 'svg'], accept_multiple_files=True)
     if uploaded_file:
+        html_content = uploaded_file.getvalue().decode('utf-8', errors='replace')
         detected_ratio = detect_aspect_ratio(html_content)
+        auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto1")
+        if auto_detect:
+            aspect_ratio = detected_ratio
+            st.info(f"🔍 Detected: **{detected_ratio}**")
+        else:
+            aspect_ratio = st.radio("Aspect Ratio", ["16:9", "1:1", "9:16"], key="ratio1")
+        if st.button("🔄 Convert to PDF", key="conv1", type="primary"):
+            temp_dir = tempfile.mkdtemp()
             try:
                 with st.spinner("Converting..."):
                     processed_html = html_content
                     if uploaded_images:
+                        processed_html, replacements = embed_images_as_base64(html_content, uploaded_images)
+                        if replacements:
+                            st.success(f"✅ Embedded {len(replacements)} image reference(s)")
                     pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio, temp_dir)
                     if error:
                         st.error(f"❌ {error}")
                     else:
+                        st.success("✅ PDF generated!")
+                        st.download_button(
+                            "⬇️ Download PDF",
+                            data=pdf_bytes,
+                            file_name=uploaded_file.name.replace('.html', '.pdf'),
+                            mime="application/pdf"
+                        )
             finally:
+                if os.path.exists(temp_dir):
                     shutil.rmtree(temp_dir, ignore_errors=True)
 with tab2:
+    html_code = st.text_area("HTML Content", height=300, value="""<!DOCTYPE html>
 <html>
 <body>
+    <div class="page" style="height:100vh; display:flex; align-items:center; justify-content:center; background:#667eea; color:white;">
+        <h1>Page 1</h1>
     </div>
+    <div class="page" style="height:100vh; display:flex; align-items:center; justify-content:center; background:#f093fb; color:white;">
+        <h1>Page 2</h1>
     </div>
 </body>
+</html>""")
+    if st.button("🔄 Convert", key="conv2", type="primary"):
+        temp_dir = tempfile.mkdtemp()
+        try:
+            with st.spinner("Converting..."):
+                aspect_ratio = detect_aspect_ratio(html_code)
+                pdf_bytes, error = convert_html_to_pdf(html_code, aspect_ratio, temp_dir)
+                if error:
+                    st.error(f"❌ {error}")
+                else:
+                    st.success("✅ PDF generated!")
+                    st.download_button("⬇️ Download PDF", data=pdf_bytes, file_name="converted.pdf", mime="application/pdf")
+        finally:
+            if os.path.exists(temp_dir):
+                shutil.rmtree(temp_dir, ignore_errors=True)
+with tab3:
+    st.markdown("""
+    ## 📡 REST API Documentation
+    ### Endpoints
+    **Health Check**
+    ```bash
+    curl https://abdallalswaiti-htmlpdf.hf.space/api/health
+    ```
+    **Convert HTML to PDF**
+    ```bash
+    curl -X POST https://abdallalswaiti-htmlpdf.hf.space/api/convert \\
+      -F 'html_content=<html><body><div class="page">Hello</div></body></html>' \\
+      --output output.pdf
+    ```
+    **With Images**
+    ```bash
+    curl -X POST https://abdallalswaiti-htmlpdf.hf.space/api/convert \\
+      -F "html_file=@document.html" \\
+      -F "images=@logo.png" \\
+      -F "aspect_ratio=16:9" \\
+      --output output.pdf
+    ```
+    ### Python Example
+    ```python
+    import requests
+    response = requests.post(
+        'https://abdallalswaiti-htmlpdf.hf.space/api/convert',
+        data={'html_content': '<html><body><div class="page">Test</div></body></html>'}
     )
+    with open('output.pdf', 'wb') as f:
+        f.write(response.content)
+    ```
+    ### Parameters
+    - `html_file` (file): HTML file upload
+    - `html_content` (string): Raw HTML content
+    - `aspect_ratio` (string): "16:9", "1:1", or "9:16"
+    - `auto_detect` (boolean): Auto-detect ratio
+    - `images` (files): Images to embed
+    """)
 st.markdown("---")
+st.markdown("💡 **Tip:** Use `.page` or `.slide` classes for automatic page breaks")