Spaces:
Runtime error
Runtime error
File size: 2,128 Bytes
d0bc04c 58ca006 85287ae 58ca006 85287ae 58ca006 c1cc164 58ca006 c1cc164 58ca006 d0bc04c 85287ae 58ca006 c1cc164 2418fb0 c1cc164 2418fb0 58ca006 c1cc164 2418fb0 d0bc04c 58ca006 d0bc04c 85287ae c1cc164 58ca006 d0bc04c 58ca006 c1cc164 2418fb0 58ca006 c1cc164 58ca006 c1cc164 58ca006 c1cc164 58ca006 c1cc164 d0bc04c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | import re
from typing import Dict, Tuple, List
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm
import io
EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
PHONE_RE = re.compile(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}")
NAME_LINE_RE = re.compile(r"^(?:氏名|Name)\s*[::]?\s*(.+)$", re.MULTILINE)
def _unique(seq: List[str]) -> List[str]:
s = set(); out = []
for x in seq:
if x not in s:
s.add(x); out.append(x)
return out
def anonymize_text(text: str) -> Tuple[str, Dict[str, str]]:
replace_map: Dict[str, str] = {}
for i, e in enumerate(_unique(EMAIL_RE.findall(text)), start=1):
replace_map[e] = f"<EMAIL_{i}>"
for i, p in enumerate(_unique([p.strip() for p in PHONE_RE.findall(text)]), start=1):
replace_map[p] = f"<PHONE_{i}>"
names = []
for m in NAME_LINE_RE.finditer(text):
nm = m.group(1).strip()
if nm:
names.append(nm)
for i, n in enumerate(_unique(names), start=1):
replace_map[n[:80]] = f"<NAME_{i}>"
for k in sorted(replace_map.keys(), key=len, reverse=True):
text = text.replace(k, replace_map[k])
return text, replace_map
def render_anonymized_pdf(text: str) -> bytes:
buf = io.BytesIO()
c = canvas.Canvas(buf, pagesize=A4)
width, height = A4
left = 15 * mm; top = height - 15 * mm; line_h = 6 * mm
x = left; y = top
c.setFont("Helvetica", 10)
for line in text.splitlines():
if y < 20 * mm:
c.showPage(); c.setFont("Helvetica", 10); y = top
max_chars = 110
if len(line) <= max_chars:
c.drawString(x, y, line); y -= line_h
else:
start = 0
while start < len(line):
seg = line[start:start+max_chars]
c.drawString(x, y, seg); y -= line_h
if y < 20 * mm:
c.showPage(); c.setFont("Helvetica", 10); y = top
start += max_chars
c.showPage(); c.save()
return buf.getvalue()
|