ABDALLALSWAITI commited on
Commit
7b257b3
·
verified ·
1 Parent(s): 24367b7

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +12 -15
api.py CHANGED
@@ -57,35 +57,32 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
57
  absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
58
  file_url = f"file://{absolute_path}"
59
 
 
 
 
60
  # Replace various image reference patterns
61
- # Pattern 1: src="filename"
62
- html_content = re.sub(
63
- rf'src=["\'](?:\./)?{re.escape(original_name)}["\']',
64
- f'src="{file_url}"',
65
- html_content,
66
- flags=re.IGNORECASE
67
- )
68
 
69
- # Pattern 2: src='filename'
70
  html_content = re.sub(
71
- rf"src=['\"](?:\./)?{re.escape(original_name)}['\"]",
72
- f'src="{file_url}"',
73
  html_content,
74
  flags=re.IGNORECASE
75
  )
76
 
77
- # Pattern 3: background-image: url(filename)
78
  html_content = re.sub(
79
- rf'url\(["\']?(?:\./)?{re.escape(original_name)}["\']?\)',
80
  f'url("{file_url}")',
81
  html_content,
82
  flags=re.IGNORECASE
83
  )
84
 
85
- # Pattern 4: href for links
86
  html_content = re.sub(
87
- rf'href=["\'](?:\./)?{re.escape(original_name)}["\']',
88
- f'href="{file_url}"',
89
  html_content,
90
  flags=re.IGNORECASE
91
  )
 
57
  absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
58
  file_url = f"file://{absolute_path}"
59
 
60
+ # Escape the filename for regex
61
+ escaped_name = re.escape(original_name)
62
+
63
  # Replace various image reference patterns
64
+ # Match filename with or without directory paths (images/, src/images/, ./images/, etc.)
 
 
 
 
 
 
65
 
66
+ # Pattern 1: src with any path prefix
67
  html_content = re.sub(
68
+ rf'src=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
69
+ f'src=\\1{file_url}\\1',
70
  html_content,
71
  flags=re.IGNORECASE
72
  )
73
 
74
+ # Pattern 2: url() with any path prefix
75
  html_content = re.sub(
76
+ rf'url\((["\']?)(?:[^)"\']*/)?{escaped_name}\1\)',
77
  f'url("{file_url}")',
78
  html_content,
79
  flags=re.IGNORECASE
80
  )
81
 
82
+ # Pattern 3: href with any path prefix
83
  html_content = re.sub(
84
+ rf'href=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
85
+ f'href=\\1{file_url}\\1',
86
  html_content,
87
  flags=re.IGNORECASE
88
  )