Spaces:
Runtime error
Runtime error
| import re | |
| from typing import Dict, Tuple, List | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.lib.units import mm | |
| import io | |
| EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}") | |
| PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}") | |
| NAME_LINE_RE = re.compile(r"^(?:氏名|Name)\s*[::]?\s*(.+)$", re.MULTILINE) | |
| def _unique(seq: List[str]) -> List[str]: | |
| s = set(); out = [] | |
| for x in seq: | |
| if x not in s: | |
| s.add(x); out.append(x) | |
| return out | |
| def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]: | |
| replace_map: Dict[str, str] = {} | |
| for i, e in enumerate(_unique(EMAIL_RE.findall(text)), start=1): | |
| replace_map[e] = f"<EMAIL_{i}>" | |
| for i, p in enumerate(_unique([p.strip() for p in PHONE_RE.findall(text)]), start=1): | |
| replace_map[p] = f"<PHONE_{i}>" | |
| names = [] | |
| for m in NAME_LINE_RE.finditer(text): | |
| nm = m.group(1).strip() | |
| if nm: | |
| names.append(nm) | |
| for i, n in enumerate(_unique(names), start=1): | |
| replace_map[n[:80]] = f"<NAME_{i}>" | |
| for k in sorted(replace_map.keys(), key=len, reverse=True): | |
| text = text.replace(k, replace_map[k]) | |
| return text, replace_map | |
| def render_anonymized_pdf(text: str) -> bytes: | |
| buf = io.BytesIO() | |
| c = canvas.Canvas(buf, pagesize=A4) | |
| width, height = A4 | |
| left = 15 * mm; top = height - 15 * mm; line_h = 6 * mm | |
| x = left; y = top | |
| c.setFont("Helvetica", 10) | |
| for line in text.splitlines(): | |
| if y < 20 * mm: | |
| c.showPage(); c.setFont("Helvetica", 10); y = top | |
| max_chars = 110 | |
| if len(line) <= max_chars: | |
| c.drawString(x, y, line); y -= line_h | |
| else: | |
| start = 0 | |
| while start < len(line): | |
| seg = line[start:start+max_chars] | |
| c.drawString(x, y, seg); y -= line_h | |
| if y < 20 * mm: | |
| c.showPage(); c.setFont("Helvetica", 10); y = top | |
| start += max_chars | |
| c.showPage(); c.save() | |
| return buf.getvalue() | |