Spaces:
Sleeping
Sleeping
| """Rendering HTML per i 3 tab di output (Evidenziato / Anonimizzato / Report).""" | |
| import html | |
| import re as _re | |
| from presidio_analyzer import RecognizerResult | |
| from config import LABEL_IT, SEVERITY_COLORS, SEVERITY_LABELS, SEVERITY_ORDER, get_severity | |
| _BOX = ( | |
| "border:1px solid #e5e7eb; border-radius:10px; " | |
| "padding:20px 24px; background:#ffffff; " | |
| "min-height:200px; box-sizing:border-box;" | |
| ) | |
| _FONT = "Arial, Helvetica, sans-serif" | |
| _TEXT = f"font-family:{_FONT}; font-size:0.95em; line-height:1.95; white-space:pre-wrap; word-wrap:break-word; color:#111827;" | |
| _PH_RE = _re.compile(r"\[[A-Z_]+(?:_\d+)?\]") | |
| def _src(r: RecognizerResult) -> str: | |
| p = (r.recognition_metadata or {}).get("source_priority", -1) | |
| return {0: "REGEX", 1: "NER", 2: "GLINER"}.get(p, "?") | |
| def _boosted(r: RecognizerResult) -> bool: | |
| return bool((r.recognition_metadata or {}).get("post_boost", False)) | |
| def _agree(r: RecognizerResult) -> int: | |
| return int((r.recognition_metadata or {}).get("cross_layer_agreement", 0)) | |
| def _badge(r: RecognizerResult) -> str: | |
| label = LABEL_IT.get(r.entity_type, r.entity_type) | |
| return f"{label}+" if _boosted(r) else label | |
| def _box(inner: str) -> str: | |
| return f'<div style="{_BOX}">{inner}</div>' | |
| def _empty(msg: str) -> str: | |
| return _box(f'<p style="font-family:{_FONT}; color:#6b7280; text-align:center; padding:40px 0; margin:0;">{msg}</p>') | |
| # --------------------------------------------------------------------------- | |
| # Tab 1 – Testo evidenziato | |
| # --------------------------------------------------------------------------- | |
| def render_highlighted_text(text: str, entities: list[RecognizerResult]) -> str: | |
| if not text: | |
| return _empty("Inserisci del testo per iniziare.") | |
| if not entities: | |
| return _box(f'<div style="{_TEXT}">{html.escape(text)}</div>') | |
| parts: list[str] = [] | |
| cursor = 0 | |
| for r in sorted(entities, key=lambda r: r.start): | |
| if r.start > cursor: | |
| parts.append(html.escape(text[cursor:r.start])) | |
| original = text[r.start:r.end] | |
| sev = get_severity(r.entity_type) | |
| c = SEVERITY_COLORS[sev] | |
| badge = _badge(r) | |
| boost = _boosted(r) | |
| an = _agree(r) | |
| tip = f"Score: {r.score:.2f} · {_src(r)}" | |
| if an >= 2: | |
| tip += f" · ⚡ {an} livelli concordano" | |
| if boost: | |
| tip += " · ✓ Doppio match regex" | |
| border_b = f"border-bottom:2px solid {c['border']};" if boost else "" | |
| parts.append( | |
| f'<span title="{tip}" style="background:{c["bg"]}; ' | |
| f'border-left:3px solid {c["border"]}; {border_b}' | |
| f'padding:1px 6px 1px 5px; border-radius:3px; color:{c["text"]};">' | |
| f'{html.escape(original)}' | |
| f'<sub style="font-size:0.65em; margin-left:4px; font-weight:700; ' | |
| f'color:{c["badge"]}; letter-spacing:0.03em;">{html.escape(badge)}</sub></span>' | |
| ) | |
| cursor = r.end | |
| if cursor < len(text): | |
| parts.append(html.escape(text[cursor:])) | |
| return _box(f'<div style="{_TEXT}">{"".join(parts)}</div>') | |
| # --------------------------------------------------------------------------- | |
| # Tab 2 – Testo anonimizzato | |
| # --------------------------------------------------------------------------- | |
| def render_anonymized_text(anon_text: str) -> str: | |
| if not anon_text: | |
| return _empty("Nessun risultato.") | |
| def _hl(m: _re.Match) -> str: | |
| return ( | |
| f'<span style="background:#f3f4f6; border:1px solid #e5e7eb; ' | |
| f'border-radius:4px; padding:1px 6px; font-weight:600; ' | |
| f'color:#374151; font-size:0.88em;">{html.escape(m.group())}</span>' | |
| ) | |
| return _box(f'<div style="{_TEXT}">{_PH_RE.sub(_hl, html.escape(anon_text))}</div>') | |
| # --------------------------------------------------------------------------- | |
| # Tab 3 – Report categorizzato | |
| # --------------------------------------------------------------------------- | |
| def _card(sev: str, count: int) -> str: | |
| c = SEVERITY_COLORS[sev] | |
| return ( | |
| f'<div style="flex:1; padding:14px 16px; background:{c["bg"]}; ' | |
| f'border:1px solid {c["border"]}; border-radius:8px;">' | |
| f'<div style="font-size:0.7em; color:{c["badge"]}; font-weight:700; ' | |
| f'text-transform:uppercase; letter-spacing:0.06em;">Gravità {SEVERITY_LABELS[sev]}</div>' | |
| f'<div style="font-size:1.9em; color:{c["text"]}; font-weight:700; ' | |
| f'line-height:1.1; margin-top:4px;">{count}</div></div>' | |
| ) | |
| def render_categorized_report(text: str, entities: list[RecognizerResult]) -> str: | |
| if not entities: | |
| return _empty("Nessuna entità rilevata sopra lo score minimo.") | |
| by_sev: dict[str, list[RecognizerResult]] = {s: [] for s in SEVERITY_ORDER} | |
| for r in entities: | |
| by_sev[get_severity(r.entity_type)].append(r) | |
| parts: list[str] = [f'<div style="font-family:{_FONT}; color:#111827;">'] | |
| parts.append('<div style="display:flex; gap:12px; margin-bottom:24px;">') | |
| for sev in SEVERITY_ORDER: | |
| parts.append(_card(sev, len(by_sev[sev]))) | |
| parts.append('</div>') | |
| for sev in SEVERITY_ORDER: | |
| items = by_sev[sev] | |
| if not items: | |
| continue | |
| c = SEVERITY_COLORS[sev] | |
| parts.append( | |
| f'<h3 style="color:{c["badge"]}; border-bottom:1px solid {c["border"]}; ' | |
| f'padding-bottom:6px; margin:24px 0 12px 0; font-size:0.88em; font-weight:700; ' | |
| f'text-transform:uppercase; letter-spacing:0.05em;">' | |
| f'Gravità {SEVERITY_LABELS[sev]} ' | |
| f'<span style="font-weight:500; color:#6b7280; text-transform:none; ' | |
| f'letter-spacing:normal; font-size:0.95em;">· {len(items)} occorrenze</span></h3>' | |
| ) | |
| by_type: dict[str, list[RecognizerResult]] = {} | |
| for r in items: | |
| by_type.setdefault(r.entity_type, []).append(r) | |
| for entity_type, results in by_type.items(): | |
| label = LABEL_IT.get(entity_type, entity_type) | |
| parts.append( | |
| f'<div style="margin:14px 0 0 4px;">' | |
| f'<strong style="color:{c["badge"]}; font-size:0.95em;">{html.escape(label)}</strong>' | |
| f'<span style="color:#6b7280; font-size:0.85em; margin-left:8px;">{len(results)} occ.</span>' | |
| f'<ul style="margin:6px 0 0 0; padding-left:14px; list-style:none; border-left:2px solid {c["border"]};">' | |
| ) | |
| for r in results: | |
| original = text[r.start:r.end] | |
| boost_tag = ( | |
| '<span style="font-size:0.72em; color:#047857; font-weight:700; ' | |
| 'margin-left:6px; background:#d1fae5; padding:1px 5px; border-radius:3px;">✓ boost</span>' | |
| if _boosted(r) else "" | |
| ) | |
| agree_n = _agree(r) | |
| agree_tag = ( | |
| f'<span style="font-size:0.72em; color:#6366f1; font-weight:700; ' | |
| f'margin-left:6px; background:#e0e7ff; padding:1px 5px; border-radius:3px;" ' | |
| f'title="Rilevato da {agree_n} livelli">⚡ x{agree_n}</span>' | |
| if agree_n >= 2 else "" | |
| ) | |
| parts.append( | |
| f'<li style="margin:4px 0; color:#111827; font-size:0.9em;">' | |
| f'<code style="background:{c["bg"]}; padding:1px 6px; border-radius:3px; ' | |
| f'color:#111827; font-size:0.95em;">{html.escape(original)}</code>' | |
| f'<span style="color:#6b7280; font-size:0.85em; margin-left:8px;">' | |
| f'[{html.escape(_badge(r))}] score {r.score:.2f} · {_src(r)}</span>' | |
| f'{boost_tag}{agree_tag}</li>' | |
| ) | |
| parts.append('</ul></div>') | |
| parts.append('</div>') | |
| return _box("".join(parts)) | |