Spaces:

ABDALLALSWAITI
/

htmlpdf

Sleeping

ABDALLALSWAITI commited on Oct 16, 2025

Commit

7b257b3

verified ·

1 Parent(s): 24367b7

Update api.py

Files changed (1) hide show

api.py CHANGED Viewed

@@ -57,35 +57,32 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
         absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
         file_url = f"file://{absolute_path}"
         # Replace various image reference patterns
-        # Pattern 1: src="filename"
-        html_content = re.sub(
-            rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
-            f'src="{file_url}"',
-            html_content,
-            flags=re.IGNORECASE
-        )
-        # Pattern 2: src='filename'
         html_content = re.sub(
-            rf"src=['\"](?:\./)?{re.escape(original_name)}['\"]",
-            f'src="{file_url}"',
             html_content,
             flags=re.IGNORECASE
         )
-        # Pattern 3: background-image: url(filename)
         html_content = re.sub(
-            rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)',
             f'url("{file_url}")',
             html_content,
             flags=re.IGNORECASE
         )
-        # Pattern 4: href for links
         html_content = re.sub(
-            rf'href=["\'](?:\./)?{re.escape(original_name)}["\']',
-            f'href="{file_url}"',
             html_content,
             flags=re.IGNORECASE
         )

         absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
         file_url = f"file://{absolute_path}"
+        # Escape the filename for regex
+        escaped_name = re.escape(original_name)
         # Replace various image reference patterns
+        # Match filename with or without directory paths (images/, src/images/, ./images/, etc.)
+        # Pattern 1: src with any path prefix
         html_content = re.sub(
+            rf'src=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
+            f'src=\\1{file_url}\\1',
             html_content,
             flags=re.IGNORECASE
         )
+        # Pattern 2: url() with any path prefix
         html_content = re.sub(
+            rf'url\((["\']?)(?:[^)"\']*/)?{escaped_name}\1\)',
             f'url("{file_url}")',
             html_content,
             flags=re.IGNORECASE
         )
+        # Pattern 3: href with any path prefix
         html_content = re.sub(
+            rf'href=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
+            f'href=\\1{file_url}\\1',
             html_content,
             flags=re.IGNORECASE
         )