Spaces:
Sleeping
Sleeping
File size: 1,036 Bytes
ef839aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import re
import base64
from typing import List, Tuple
BASE64_REGEX = re.compile(
r"(?:[A-Za-z0-9+/]{10,}={0,2})"
)
MAX_DECODE_LEN = 1000 # hard safety limit
def extract_base64_segments(text: str) -> List[Tuple[str, str]]:
"""
Returns list of (original_base64, decoded_text)
"""
matches = BASE64_REGEX.findall(text)
print('matches...', matches)
decoded_segments = []
for b64 in matches:
try:
decoded = base64.b64decode(b64, validate=True).decode("utf-8")
if len(decoded) <= MAX_DECODE_LEN:
decoded_segments.append((b64, decoded))
except Exception:
continue # invalid base64 → ignore
return decoded_segments
def replace_base64_with_decoded(text: str, segments: List[Tuple[str, str]]) -> str:
"""
Replace base64 substrings with explicitly marked decoded content.
"""
for original, decoded in segments:
safe_decoded = f"{decoded}"
text = text.replace(original, safe_decoded)
return text
|