File size: 1,036 Bytes
ef839aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import re
import base64
from typing import List, Tuple

BASE64_REGEX = re.compile(
    r"(?:[A-Za-z0-9+/]{10,}={0,2})"
)

MAX_DECODE_LEN = 1000  # hard safety limit


def extract_base64_segments(text: str) -> List[Tuple[str, str]]:
    """
    Returns list of (original_base64, decoded_text)
    """
    matches = BASE64_REGEX.findall(text)
    print('matches...', matches)
    decoded_segments = []

    for b64 in matches:
        try:
            decoded = base64.b64decode(b64, validate=True).decode("utf-8")
            if len(decoded) <= MAX_DECODE_LEN:
                decoded_segments.append((b64, decoded))
        except Exception:
            continue  # invalid base64 → ignore

    return decoded_segments


def replace_base64_with_decoded(text: str, segments: List[Tuple[str, str]]) -> str:
    """
    Replace base64 substrings with explicitly marked decoded content.
    """
    for original, decoded in segments:
        safe_decoded = f"{decoded}"
        text = text.replace(original, safe_decoded)
    return text