import re from typing import Dict, Tuple, List from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 from reportlab.lib.units import mm import io EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}") NAME_LINE_RE = re.compile(r"^(?:氏名|Name)\s*[::]?\s*(.+)$", re.MULTILINE) def _unique(seq: List[str]) -> List[str]: s = set(); out = [] for x in seq: if x not in s: s.add(x); out.append(x) return out def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]: replace_map: Dict[str, str] = {} for i, e in enumerate(_unique(EMAIL_RE.findall(text)), start=1): replace_map[e] = f"" for i, p in enumerate(_unique([p.strip() for p in PHONE_RE.findall(text)]), start=1): replace_map[p] = f"" names = [] for m in NAME_LINE_RE.finditer(text): nm = m.group(1).strip() if nm: names.append(nm) for i, n in enumerate(_unique(names), start=1): replace_map[n[:80]] = f"" for k in sorted(replace_map.keys(), key=len, reverse=True): text = text.replace(k, replace_map[k]) return text, replace_map def render_anonymized_pdf(text: str) -> bytes: buf = io.BytesIO() c = canvas.Canvas(buf, pagesize=A4) width, height = A4 left = 15 * mm; top = height - 15 * mm; line_h = 6 * mm x = left; y = top c.setFont("Helvetica", 10) for line in text.splitlines(): if y < 20 * mm: c.showPage(); c.setFont("Helvetica", 10); y = top max_chars = 110 if len(line) <= max_chars: c.drawString(x, y, line); y -= line_h else: start = 0 while start < len(line): seg = line[start:start+max_chars] c.drawString(x, y, seg); y -= line_h if y < 20 * mm: c.showPage(); c.setFont("Helvetica", 10); y = top start += max_chars c.showPage(); c.save() return buf.getvalue()