Spaces:
Running
Running
| import re | |
| import base64 | |
| from typing import List, Tuple | |
| BASE64_REGEX = re.compile( | |
| r"(?:[A-Za-z0-9+/]{10,}={0,2})" | |
| ) | |
| MAX_DECODE_LEN = 1000 # hard safety limit | |
| def extract_base64_segments(text: str) -> List[Tuple[str, str]]: | |
| """ | |
| Returns list of (original_base64, decoded_text) | |
| """ | |
| matches = BASE64_REGEX.findall(text) | |
| print('matches...', matches) | |
| decoded_segments = [] | |
| for b64 in matches: | |
| try: | |
| decoded = base64.b64decode(b64, validate=True).decode("utf-8") | |
| if len(decoded) <= MAX_DECODE_LEN: | |
| decoded_segments.append((b64, decoded)) | |
| except Exception: | |
| continue # invalid base64 → ignore | |
| return decoded_segments | |
| def replace_base64_with_decoded(text: str, segments: List[Tuple[str, str]]) -> str: | |
| """ | |
| Replace base64 substrings with explicitly marked decoded content. | |
| """ | |
| for original, decoded in segments: | |
| safe_decoded = f"{decoded}" | |
| text = text.replace(original, safe_decoded) | |
| return text | |