Spaces:
Sleeping
Sleeping
Update api.py
Browse files
api.py
CHANGED
|
@@ -57,35 +57,32 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
|
|
| 57 |
absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
|
| 58 |
file_url = f"file://{absolute_path}"
|
| 59 |
|
|
|
|
|
|
|
|
|
|
| 60 |
# Replace various image reference patterns
|
| 61 |
-
#
|
| 62 |
-
html_content = re.sub(
|
| 63 |
-
rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
|
| 64 |
-
f'src="{file_url}"',
|
| 65 |
-
html_content,
|
| 66 |
-
flags=re.IGNORECASE
|
| 67 |
-
)
|
| 68 |
|
| 69 |
-
# Pattern
|
| 70 |
html_content = re.sub(
|
| 71 |
-
rf
|
| 72 |
-
f'src
|
| 73 |
html_content,
|
| 74 |
flags=re.IGNORECASE
|
| 75 |
)
|
| 76 |
|
| 77 |
-
# Pattern
|
| 78 |
html_content = re.sub(
|
| 79 |
-
rf'url\(["\']?
|
| 80 |
f'url("{file_url}")',
|
| 81 |
html_content,
|
| 82 |
flags=re.IGNORECASE
|
| 83 |
)
|
| 84 |
|
| 85 |
-
# Pattern
|
| 86 |
html_content = re.sub(
|
| 87 |
-
rf'href=["\']
|
| 88 |
-
f'href
|
| 89 |
html_content,
|
| 90 |
flags=re.IGNORECASE
|
| 91 |
)
|
|
|
|
| 57 |
absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
|
| 58 |
file_url = f"file://{absolute_path}"
|
| 59 |
|
| 60 |
+
# Escape the filename for regex
|
| 61 |
+
escaped_name = re.escape(original_name)
|
| 62 |
+
|
| 63 |
# Replace various image reference patterns
|
| 64 |
+
# Match filename with or without directory paths (images/, src/images/, ./images/, etc.)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
+
# Pattern 1: src with any path prefix
|
| 67 |
html_content = re.sub(
|
| 68 |
+
rf'src=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
|
| 69 |
+
f'src=\\1{file_url}\\1',
|
| 70 |
html_content,
|
| 71 |
flags=re.IGNORECASE
|
| 72 |
)
|
| 73 |
|
| 74 |
+
# Pattern 2: url() with any path prefix
|
| 75 |
html_content = re.sub(
|
| 76 |
+
rf'url\((["\']?)(?:[^)"\']*/)?{escaped_name}\1\)',
|
| 77 |
f'url("{file_url}")',
|
| 78 |
html_content,
|
| 79 |
flags=re.IGNORECASE
|
| 80 |
)
|
| 81 |
|
| 82 |
+
# Pattern 3: href with any path prefix
|
| 83 |
html_content = re.sub(
|
| 84 |
+
rf'href=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
|
| 85 |
+
f'href=\\1{file_url}\\1',
|
| 86 |
html_content,
|
| 87 |
flags=re.IGNORECASE
|
| 88 |
)
|