import re import base64 from typing import List, Tuple BASE64_REGEX = re.compile( r"(?:[A-Za-z0-9+/]{10,}={0,2})" ) MAX_DECODE_LEN = 1000 # hard safety limit def extract_base64_segments(text: str) -> List[Tuple[str, str]]: """ Returns list of (original_base64, decoded_text) """ matches = BASE64_REGEX.findall(text) print('matches...', matches) decoded_segments = [] for b64 in matches: try: decoded = base64.b64decode(b64, validate=True).decode("utf-8") if len(decoded) <= MAX_DECODE_LEN: decoded_segments.append((b64, decoded)) except Exception: continue # invalid base64 → ignore return decoded_segments def replace_base64_with_decoded(text: str, segments: List[Tuple[str, str]]) -> str: """ Replace base64 substrings with explicitly marked decoded content. """ for original, decoded in segments: safe_decoded = f"{decoded}" text = text.replace(original, safe_decoded) return text