"""Anonimizzatore Gare d'Appalto — UI Gradio.""" import json import gradio as gr from config import ( MODES, DEFAULT_MIN_SCORE, SEVERITY_ORDER, SEVERITY_LABELS, SEVERITY_COLORS, SEVERITY_CRITICAL, SEVERITY_HIGH, SEVERITY_MEDIUM, SEVERITY_LOW, get_severity, ) from core.pipeline import detect, anonymize_from_entities, apply_custom_lines from ui.renderers import ( render_highlighted_text, render_anonymized_text, render_categorized_report, build_json_by_severity, ) from utils import extract_pdf_text, extract_pdf_text_got_ocr, extract_pdf_text_smoldocling from demo_text import DEMO_TEXT, DEMO_REGEX, DEMO_GLINER, DEMO_NER _DEMO_TEXTS = { "Bando completo": DEMO_TEXT, "Layer 0 — Regex": DEMO_REGEX, "Layer 1 — NER": DEMO_NER, "Layer 2 — GLiNER": DEMO_GLINER, } _PDF_EXTRACTORS = { "PyMuPDF (default)": extract_pdf_text, "GOT-OCR 2.0 (~30s/pag, CPU)": extract_pdf_text_got_ocr, "SmolDocling 256M (CPU)": extract_pdf_text_smoldocling, } _STATE_DEFAULT = {"orig_text": "", "mode": "", "anon_text": "", "entities": []} _ALL_SEVERITIES = list(SEVERITY_ORDER) _EMPTY_JSON = "{}" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _active_set(active_severities: list[str]) -> set[str]: return set(active_severities) if active_severities is not None else set(_ALL_SEVERITIES) # --------------------------------------------------------------------------- # Handlers # --------------------------------------------------------------------------- def _process(text: str, mode: str, min_score: float, use_regex: bool, active_severities: list[str]): """Esegue il pipeline completo e restituisce i 4 output Gradio. Le entità rilevate vengono salvate nello stato in modo che variazioni successive di modalità o gravità possano ricalcolare l'output senza rieseguire il costoso step di rilevamento NLP. """ active = _active_set(active_severities) entities = detect(text, min_score=min_score, use_regex=use_regex) filtered = [e for e in entities if get_severity(e.entity_type) in active] anon_text = anonymize_from_entities(text, filtered, mode) new_state = {"orig_text": text, "mode": mode, "anon_text": anon_text, "entities": entities} json_c, json_h, json_m, json_l = build_json_by_severity(text, entities, active) return ( render_highlighted_text(text, entities, active), render_anonymized_text(anon_text), render_categorized_report(text, entities, active), new_state, json_c, json_h, json_m, json_l, ) def handle_settings_change(state_data: dict, mode: str, c: bool, h: bool, m: bool, l: bool): """Ricalcola l'output quando l'utente cambia modalità o livelli di gravità. Non riesegue il rilevamento NLP: usa le entità già salvate nello stato. """ entities = state_data.get("entities") or [] orig_text = state_data.get("orig_text") or "" if not orig_text or not entities: return (gr.update(), gr.update(), gr.update(), state_data, gr.update(), gr.update(), gr.update(), gr.update()) active = _active_set([s for s, on in zip(SEVERITY_ORDER, [c, h, m, l]) if on]) filtered = [e for e in entities if get_severity(e.entity_type) in active] anon_text = anonymize_from_entities(orig_text, filtered, mode) new_state = {**state_data, "mode": mode, "anon_text": anon_text} json_c, json_h, json_m, json_l = build_json_by_severity(orig_text, entities, active) return ( render_highlighted_text(orig_text, entities, active), render_anonymized_text(anon_text), render_categorized_report(orig_text, entities, active), new_state, json_c, json_h, json_m, json_l, ) def handle_text(text, mode, min_score, use_regex, active_severities): _empty = ( '
Inserisci del testo per iniziare.
' ) if not text or not text.strip(): return (_empty, _empty, _empty, _STATE_DEFAULT.copy(), _EMPTY_JSON, _EMPTY_JSON, _EMPTY_JSON, _EMPTY_JSON) return _process(text, mode, min_score, use_regex, active_severities) def handle_pdf(pdf_file, mode, min_score, use_regex, active_severities, extractor_label="PyMuPDF (default)"): def _err(msg): e = ( f'
{msg}
' ) return (e, e, e, _STATE_DEFAULT.copy(), _EMPTY_JSON, _EMPTY_JSON, _EMPTY_JSON, _EMPTY_JSON) if pdf_file is None: return _err("Carica un PDF per iniziare.") extractor = _PDF_EXTRACTORS.get(extractor_label, extract_pdf_text) raw = extractor(pdf_file) if not raw.strip(): return _err("⚠️ Impossibile estrarre testo dal PDF (scansionato?).") return _process(raw, mode, min_score, use_regex, active_severities) def handle_update_custom(selected_lines_json: str, state_data: dict): """Applica le righe selezionate (via JS al click) al testo anonimizzato.""" lines: list[str] = json.loads(selected_lines_json or "[]") if not lines or not state_data.get("orig_text"): return render_anonymized_text(state_data.get("anon_text", "")) new_anon = apply_custom_lines( state_data["anon_text"], state_data["orig_text"], lines, state_data["mode"], ) return render_anonymized_text(new_anon) # --------------------------------------------------------------------------- # JavaScript # --------------------------------------------------------------------------- _PAGE_JS = """ (function () { /* ── Overlay fullscreen di loading ─────────────────────────────────── */ var ov = document.createElement('div'); ov.id = 'anon-fs-overlay'; ov.innerHTML = [ '
', ' ', ' ', ' ', ' ', '

Analisi in corso…

', '
', '', ].join(''); document.body.appendChild(ov); window._anonHideOverlay = function () { clearTimeout(window._anonOverlayTimer); ov.style.display = 'none'; }; function showOverlay() { clearTimeout(window._anonOverlayTimer); ov.style.display = 'flex'; /* Fallback: si nasconde automaticamente dopo 50 secondi */ window._anonOverlayTimer = setTimeout(window._anonHideOverlay, 50000); } /* Mostra overlay al click sui pulsanti "Anonimizza" */ document.addEventListener('click', function (e) { var btn = e.target.closest('button'); if (btn && btn.classList.contains('primary') && (btn.textContent || '').trim() === 'Anonimizza') { showOverlay(); } }, true); /* ── Checkbox righe custom ──────────────────────────────────────────── */ document.addEventListener('change', function (e) { if (!e.target || !e.target.classList.contains('anon-line-cb')) return; var hasChecked = document.querySelectorAll('.anon-line-cb:checked').length > 0; var w = document.getElementById('anon-custom-warning'); if (w) w.style.display = hasChecked ? 'block' : 'none'; }, true); })(); """ # JS eseguito da .then() — nasconde l'overlay quando il processing Python termina _HIDE_OVERLAY_JS = "() => { if (window._anonHideOverlay) window._anonHideOverlay(); }" _COLLECT_JS = """ (placeholder, state) => { var r = []; document.querySelectorAll('.anon-line-cb:checked').forEach(function (cb) { var row = cb.closest('.anon-line-row'); if (row) r.push(row.getAttribute('data-line')); }); return [JSON.stringify(r), state]; } """ # --------------------------------------------------------------------------- # CSS # --------------------------------------------------------------------------- _SEV_CSS = "" for _s in SEVERITY_ORDER: _c = SEVERITY_COLORS[_s] # Selettori label: background, bordo, padding _SEV_CSS += ( f'.sev-{_s} .wrap label, .sev-{_s} label {{' f'background:{_c["bg"]} !important; ' f'border-left:3px solid {_c["border"]} !important; ' f'border-radius:6px !important; ' f'padding:4px 12px !important;}}\n' ) # .label-text è lo span interno dove Gradio/Svelte mette il testo del checkbox _SEV_CSS += ( f'.sev-{_s} .label-text, .sev-{_s} .wrap .label-text {{' f'color:{_c["text"]} !important; ' f'font-size:0.85em !important; font-weight:500 !important;}}\n' ) _CSS = """ .gradio-container { max-width: 1200px !important; margin: 0 auto !important; font-family: Arial, Helvetica, sans-serif !important; } .gradio-container * { font-family: Arial, Helvetica, sans-serif !important; } .header-block h1 { margin-bottom: 4px !important; font-weight: 700; } .section-label { font-size: 0.78em !important; font-weight: 700 !important; text-transform: uppercase; letter-spacing: 0.06em; color: #6b7280 !important; margin-bottom: 6px !important; } button.primary { font-weight: 600 !important; } /* Forza il colore del testo nei box HTML di output (Evidenziato / Anonimizzato / Report). Gradio sovrascrive il color dei figli con regole scoped — questo lo corregge. */ .output-html, .output-html *:not([style*="color"]) { color: #111827 !important; } """ + _SEV_CSS # --------------------------------------------------------------------------- # UI # --------------------------------------------------------------------------- with gr.Blocks( title="Anonimizzatore Gare d'Appalto", theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate", font=["Arial", "Helvetica", "sans-serif"]), css=_CSS, js=_PAGE_JS, ) as demo: state = gr.State(_STATE_DEFAULT.copy()) _lines_placeholder = gr.Textbox(value="[]", visible=False) with gr.Row(elem_classes=["header-block"]): gr.Markdown( "# 🔒 Anonimizzatore Gare d'Appalto\n" "Pipeline a 3 livelli: **Regex** → **NER** (Italian_NER_XXL_v2) → " "**GLiNER** (PII_ITA zero-shot), con doppio passaggio regex finale." ) gr.Markdown("### Impostazioni", elem_classes=["section-label"]) mode_radio = gr.Radio( choices=list(MODES.keys()), value=list(MODES.keys())[0], label="Modalità di offuscamento", ) with gr.Row(equal_height=True): score_slider = gr.Slider( minimum=0.0, maximum=1.0, value=DEFAULT_MIN_SCORE, step=0.05, label="Score minimo di confidenza", scale=3, ) use_regex_chk = gr.Checkbox( value=True, label="Livello 1: regex appalti", scale=1, ) # Severity toggle — checkbox per livello; di default tutti attivi with gr.Row(): sev_checks = [] for sev in SEVERITY_ORDER: cb = gr.Checkbox( value=True, label=f"Gravità {SEVERITY_LABELS[sev]}", elem_classes=[f"sev-{sev}"], scale=1, ) sev_checks.append(cb) gr.Markdown("### Input", elem_classes=["section-label"]) with gr.Tabs(): with gr.TabItem("📝 Testo"): txt_in = gr.Textbox( label=None, lines=10, placeholder="Incolla il testo del documento di gara…", show_label=False, ) with gr.Row(): txt_btn = gr.Button("Anonimizza", variant="primary", size="lg", scale=2) demo_sel = gr.Dropdown( choices=list(_DEMO_TEXTS.keys()), value=list(_DEMO_TEXTS.keys())[0], label=None, show_label=False, scale=1, ) demo_btn = gr.Button("📋 Carica", variant="secondary", size="lg", scale=1) with gr.TabItem("📄 PDF"): pdf_in = gr.File(label="Carica un PDF", file_types=[".pdf"]) pdf_extractor_radio = gr.Radio( choices=list(_PDF_EXTRACTORS.keys()), value=list(_PDF_EXTRACTORS.keys())[0], label="Estrattore PDF", ) pdf_btn = gr.Button("Anonimizza", variant="primary", size="lg") gr.Markdown("### Risultato", elem_classes=["section-label"]) with gr.Tabs(): with gr.TabItem("🎨 Evidenziato"): out_highlighted = gr.HTML(elem_classes=["output-html"]) with gr.TabItem("🔒 Anonimizzato"): update_anon_btn = gr.Button( "🔄 Applica anonimizzazione custom", variant="secondary", size="sm", ) out_anonymized = gr.HTML(elem_classes=["output-html"]) with gr.TabItem("📊 Report"): out_report = gr.HTML(elem_classes=["output-html"]) with gr.TabItem("🔍 JSON"): with gr.Tabs(): with gr.TabItem("💥 Critica"): out_json_c = gr.Code(language="json", interactive=False, show_label=False) with gr.TabItem("🔴 Alta"): out_json_h = gr.Code(language="json", interactive=False, show_label=False) with gr.TabItem("🟡 Media"): out_json_m = gr.Code(language="json", interactive=False, show_label=False) with gr.TabItem("🔵 Bassa"): out_json_l = gr.Code(language="json", interactive=False, show_label=False) outputs_all = [out_highlighted, out_anonymized, out_report, state, out_json_c, out_json_h, out_json_m, out_json_l] inputs_common = [mode_radio, score_slider, use_regex_chk, *sev_checks] def _with_sev(handler): def _wrapped(inp, mode, min_score, use_regex, c, h, m, l): active = [s for s, on in zip(SEVERITY_ORDER, [c, h, m, l]) if on] return handler(inp, mode, min_score, use_regex, active) return _wrapped def _with_sev_pdf(handler): def _wrapped(inp, mode, min_score, use_regex, c, h, m, l, extractor_label): active = [s for s, on in zip(SEVERITY_ORDER, [c, h, m, l]) if on] return handler(inp, mode, min_score, use_regex, active, extractor_label) return _wrapped (txt_btn.click(_with_sev(handle_text), inputs=[txt_in, *inputs_common], outputs=outputs_all) .then(fn=None, js=_HIDE_OVERLAY_JS)) (pdf_btn.click(_with_sev_pdf(handle_pdf), inputs=[pdf_in, *inputs_common, pdf_extractor_radio], outputs=outputs_all) .then(fn=None, js=_HIDE_OVERLAY_JS)) demo_btn.click(lambda sel: _DEMO_TEXTS.get(sel, DEMO_TEXT), inputs=[demo_sel], outputs=txt_in) # Ricalcola output quando l'utente cambia modalità o gravità post-analisi. # Non riesegue il rilevamento NLP: riusa le entità salvate nello stato. _settings_inputs = [state, mode_radio, *sev_checks] mode_radio.change(handle_settings_change, inputs=_settings_inputs, outputs=outputs_all) for _cb in sev_checks: _cb.change(handle_settings_change, inputs=_settings_inputs, outputs=outputs_all) update_anon_btn.click( fn=handle_update_custom, inputs=[_lines_placeholder, state], outputs=[out_anonymized], js=_COLLECT_JS, ) demo.launch()