htmlpdfs

Sleeping

App Files Files Community

ABDALLALSWAITI commited on Oct 17, 2025

Commit

83a36bb

verified ·

1 Parent(s): f5befe6

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -50

app.py CHANGED Viewed

@@ -60,47 +60,67 @@ def normalize_image_paths(html_content):
     """Replace complex image paths with just filenames"""
     replacements = {}
-    # Pattern 1: img src with paths - extract filename only
-    pattern1 = r'(<img[^>]*\s+src\s*=\s*)(["\'])([^"\']*?/)?([^/"\'>]+\.(jpg|jpeg|png|gif|svg|webp|bmp))(\2)'
     def replace_img_src(match):
         prefix = match.group(1)
         quote = match.group(2)
         filename = match.group(4)
-        replacements[f"img src: {match.group(0)}"] = filename
-        return f'{prefix}{quote}{filename}{quote}'
-    html_content = re.sub(pattern1, replace_img_src, html_content, flags=re.IGNORECASE)
-    # Pattern 2: background-image with paths
-    pattern2 = r'(background-image\s*:\s*url\s*\()(["\']?)([^)"\']*/)?([^/")\']+\.(jpg|jpeg|png|gif|svg|webp|bmp))(\2)(\))'
     def replace_bg_image(match):
         prefix = match.group(1)
         quote = match.group(2)
         filename = match.group(4)
         suffix = match.group(7)
-        replacements[f"bg-image: {match.group(0)}"] = filename
-        return f'{prefix}{quote}{filename}{quote}{suffix}'
-    html_content = re.sub(pattern2, replace_bg_image, html_content, flags=re.IGNORECASE)
-    # Pattern 3: CSS url() with paths
-    pattern3 = r'(url\s*\()(["\']?)([^)"\']*/)?([^/")\']+\.(jpg|jpeg|png|gif|svg|webp|bmp))(\2)(\))'
     def replace_url(match):
-        # Skip if already processed by background-image pattern
-        if 'background-image' in html_content[max(0, match.start()-50):match.start()]:
-            return match.group(0)
         prefix = match.group(1)
         quote = match.group(2)
         filename = match.group(4)
         suffix = match.group(7)
-        replacements[f"url: {match.group(0)}"] = filename
-        return f'{prefix}{quote}{filename}{quote}{suffix}'
-    html_content = re.sub(pattern3, replace_url, html_content, flags=re.IGNORECASE)
     return html_content, replacements
@@ -205,7 +225,7 @@ def inject_page_breaks(html_content: str, aspect_ratio: str):
     return html_content
-def convert_html_to_pdf(html_content, aspect_ratio, temp_dir, images=None):
     """Convert HTML content to PDF using Puppeteer"""
     try:
         # Normalize image paths in HTML
@@ -220,11 +240,11 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir, images=None):
             f.write(html_content)
         # Save image files to the same directory
-        if images:
-            for img in images:
-                img_path = os.path.join(temp_dir, img.filename)
                 with open(img_path, 'wb') as f:
-                    f.write(img.file.read())
         # Find puppeteer script
         possible_paths = [
@@ -242,12 +262,13 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir, images=None):
         if not puppeteer_script:
             raise Exception("puppeteer_pdf.js not found")
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
-            cwd=temp_dir  # Run in temp directory so images are accessible
         )
         if result.returncode != 0:
@@ -320,35 +341,22 @@ async def convert_to_pdf(
         if aspect_ratio not in ["16:9", "1:1", "9:16"]:
             raise HTTPException(status_code=400, detail="Invalid aspect ratio. Must be 16:9, 1:1, or 9:16")
-        # Create temp directory and convert
         temp_dir = tempfile.mkdtemp()
-        # Read images into memory before conversion
-        images_list = []
         if images:
             for img in images:
                 img_bytes = await img.read()
-                # Create a simple object to hold filename and bytes
-                class ImageFile:
-                    def __init__(self, filename, content):
-                        self.filename = filename
-                        self.content = content
-                        self.file = None
-                    def get_bytes(self):
-                        return self.content
-                img_obj = ImageFile(img.filename, img_bytes)
-                # Create a file-like object for backwards compatibility
-                import io
-                img_obj.file = io.BytesIO(img_bytes)
-                images_list.append(img_obj)
         pdf_bytes, path_replacements = convert_html_to_pdf(
             html_content,
             aspect_ratio,
             temp_dir,
-            images_list if images_list else None
         )
         # Return PDF
@@ -371,8 +379,8 @@ async def convert_to_pdf(
         if temp_dir and os.path.exists(temp_dir):
             shutil.rmtree(temp_dir, ignore_errors=True)
-@app.post("/convert-base64")
-async def convert_to_pdf_base64(
     html_content: str = Form(..., description="HTML content as string"),
     aspect_ratio: Optional[str] = Form(None, description="Aspect ratio: 16:9, 1:1, or 9:16"),
     auto_detect: bool = Form(True, description="Auto-detect aspect ratio from HTML")
@@ -399,7 +407,7 @@ async def convert_to_pdf_base64(
         # Create temp directory and convert
         temp_dir = tempfile.mkdtemp()
-        pdf_bytes, path_replacements = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
         return Response(
             content=pdf_bytes,

     """Replace complex image paths with just filenames"""
     replacements = {}
+    # Pattern for img src with paths
     def replace_img_src(match):
+        full_match = match.group(0)
         prefix = match.group(1)
         quote = match.group(2)
+        path = match.group(3) if match.group(3) else ""
         filename = match.group(4)
+        if path:  # Only replace if there's a path
+            replacements[f"img: {path}{filename}"] = filename
+            return f'{prefix}{quote}{filename}{quote}'
+        return full_match
+    html_content = re.sub(
+        r'(<img[^>]*\s+src\s*=\s*)(["\'])([^"\']*?/)?([^/"\'>]+\.(jpg|jpeg|png|gif|svg|webp|bmp|JPG|JPEG|PNG|GIF|SVG|WEBP|BMP))(\2)',
+        replace_img_src,
+        html_content,
+        flags=re.IGNORECASE
+    )
+    # Pattern for background-image
     def replace_bg_image(match):
+        full_match = match.group(0)
         prefix = match.group(1)
         quote = match.group(2)
+        path = match.group(3) if match.group(3) else ""
         filename = match.group(4)
         suffix = match.group(7)
+        if path:  # Only replace if there's a path
+            replacements[f"bg: {path}{filename}"] = filename
+            return f'{prefix}{quote}{filename}{quote}{suffix}'
+        return full_match
+    html_content = re.sub(
+        r'(background-image\s*:\s*url\s*\()(["\']?)([^)"\']*/)?([^/")\']+\.(jpg|jpeg|png|gif|svg|webp|bmp|JPG|JPEG|PNG|GIF|SVG|WEBP|BMP))(\2)(\))',
+        replace_bg_image,
+        html_content,
+        flags=re.IGNORECASE
+    )
+    # Pattern for CSS url()
     def replace_url(match):
+        full_match = match.group(0)
         prefix = match.group(1)
         quote = match.group(2)
+        path = match.group(3) if match.group(3) else ""
         filename = match.group(4)
         suffix = match.group(7)
+        if path:  # Only replace if there's a path
+            replacements[f"url: {path}{filename}"] = filename
+            return f'{prefix}{quote}{filename}{quote}{suffix}'
+        return full_match
+    html_content = re.sub(
+        r'(url\s*\()(["\']?)([^)"\']*/)?([^/")\']+\.(jpg|jpeg|png|gif|svg|webp|bmp|JPG|JPEG|PNG|GIF|SVG|WEBP|BMP))(\2)(\))',
+        replace_url,
+        html_content,
+        flags=re.IGNORECASE
+    )
     return html_content, replacements
     return html_content
+def convert_html_to_pdf(html_content, aspect_ratio, temp_dir, image_files=None):
     """Convert HTML content to PDF using Puppeteer"""
     try:
         # Normalize image paths in HTML
             f.write(html_content)
         # Save image files to the same directory
+        if image_files:
+            for filename, img_bytes in image_files.items():
+                img_path = os.path.join(temp_dir, filename)
                 with open(img_path, 'wb') as f:
+                    f.write(img_bytes)
         # Find puppeteer script
         possible_paths = [
         if not puppeteer_script:
             raise Exception("puppeteer_pdf.js not found")
+        # Run Puppeteer
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
+            cwd=os.path.dirname(os.path.abspath(puppeteer_script))
         )
         if result.returncode != 0:
         if aspect_ratio not in ["16:9", "1:1", "9:16"]:
             raise HTTPException(status_code=400, detail="Invalid aspect ratio. Must be 16:9, 1:1, or 9:16")
+        # Create temp directory
         temp_dir = tempfile.mkdtemp()
+        # Read images into dictionary
+        image_files = {}
         if images:
             for img in images:
                 img_bytes = await img.read()
+                image_files[img.filename] = img_bytes
+        # Convert to PDF
         pdf_bytes, path_replacements = convert_html_to_pdf(
             html_content,
             aspect_ratio,
             temp_dir,
+            image_files
         )
         # Return PDF
         if temp_dir and os.path.exists(temp_dir):
             shutil.rmtree(temp_dir, ignore_errors=True)
+@app.post("/convert-string")
+async def convert_string_to_pdf(
     html_content: str = Form(..., description="HTML content as string"),
     aspect_ratio: Optional[str] = Form(None, description="Aspect ratio: 16:9, 1:1, or 9:16"),
     auto_detect: bool = Form(True, description="Auto-detect aspect ratio from HTML")
         # Create temp directory and convert
         temp_dir = tempfile.mkdtemp()
+        pdf_bytes, path_replacements = convert_html_to_pdf(html_content, aspect_ratio, temp_dir, None)
         return Response(
             content=pdf_bytes,