AiAnonymize / app.py
TomassiniDigital's picture
Upload 10 files
a499b83 verified
"""Anonimizzatore Gare d'Appalto — UI Gradio."""
import gradio as gr
from config import MODES, DEFAULT_MIN_SCORE, SEVERITY_COLORS, SEVERITY_LABELS, SEVERITY_ORDER
from pipeline import anonymize
from renderers import render_highlighted_text, render_anonymized_text, render_categorized_report
from utils import extract_pdf_text
from demo_text import DEMO_TEXT
# ---------------------------------------------------------------------------
# Handlers
# ---------------------------------------------------------------------------
def _process(text: str, mode: str, min_score: float, use_regex: bool):
if not text or not text.strip():
e = '<div style="padding:40px; text-align:center; color:#6b7280; font-family:Arial,sans-serif;">Inserisci del testo per iniziare.</div>'
return e, e, e
anon_text, entities = anonymize(text, mode, min_score, use_regex)
return (
render_highlighted_text(text, entities),
render_anonymized_text(anon_text),
render_categorized_report(text, entities),
)
def handle_text(text, mode, min_score, use_regex):
return _process(text, mode, min_score, use_regex)
def handle_pdf(pdf_file, mode, min_score, use_regex):
def _err(msg):
e = f'<div style="padding:40px; text-align:center; color:#6b7280; font-family:Arial,sans-serif;">{msg}</div>'
return e, e, e
if pdf_file is None:
return _err("Carica un PDF per iniziare.")
raw = extract_pdf_text(pdf_file)
if not raw.strip():
return _err("⚠️ Impossibile estrarre testo dal PDF (scansionato?).")
return _process(raw, mode, min_score, use_regex)
# ---------------------------------------------------------------------------
# Legenda gravità
# ---------------------------------------------------------------------------
def _legend_html() -> str:
badges = []
for sev in SEVERITY_ORDER:
c = SEVERITY_COLORS[sev]
badges.append(
f'<span style="background:{c["bg"]}; border-left:3px solid {c["border"]}; '
f'padding:3px 10px; border-radius:4px; color:{c["text"]}; '
f'font-size:0.85em; font-weight:500;">Gravità {SEVERITY_LABELS[sev]}</span>'
)
return (
'<div style="display:flex; gap:8px; flex-wrap:wrap; margin:8px 0; '
'font-family:Arial,Helvetica,sans-serif;">'
+ "".join(badges) + '</div>'
)
# ---------------------------------------------------------------------------
# CSS
# ---------------------------------------------------------------------------
_CSS = """
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
font-family: Arial, Helvetica, sans-serif !important;
}
.gradio-container * { font-family: Arial, Helvetica, sans-serif !important; }
.header-block h1 { margin-bottom: 4px !important; font-weight: 700; }
.section-label {
font-size: 0.78em !important; font-weight: 700 !important;
text-transform: uppercase; letter-spacing: 0.06em;
color: #6b7280 !important; margin-bottom: 6px !important;
}
button.primary { font-weight: 600 !important; }
"""
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
with gr.Blocks(
title="Anonimizzatore Gare d'Appalto",
theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate",
font=["Arial", "Helvetica", "sans-serif"]),
css=_CSS,
) as demo:
with gr.Row(elem_classes=["header-block"]):
gr.Markdown(
"# 🔒 Anonimizzatore Gare d'Appalto\n"
"Pipeline a 3 livelli: **Regex** → **NER** (Italian_NER_XXL_v2) → "
"**GLiNER** (PII_ITA zero-shot), con doppio passaggio regex finale."
)
gr.Markdown("### Impostazioni", elem_classes=["section-label"])
mode_radio = gr.Radio(
choices=list(MODES.keys()),
value=list(MODES.keys())[0],
label="Modalità di offuscamento",
)
with gr.Row(equal_height=True):
score_slider = gr.Slider(
minimum=0.0, maximum=1.0, value=DEFAULT_MIN_SCORE, step=0.05,
label="Score minimo di confidenza", scale=3,
)
use_regex_chk = gr.Checkbox(
value=True, label="Livello 1: regex appalti", scale=1,
)
gr.Markdown("### Input", elem_classes=["section-label"])
with gr.Tabs():
with gr.TabItem("📝 Testo"):
txt_in = gr.Textbox(
label=None, lines=10,
placeholder="Incolla il testo del documento di gara…",
show_label=False,
)
with gr.Row():
txt_btn = gr.Button("Anonimizza", variant="primary", size="lg", scale=3)
demo_btn = gr.Button("📋 Carica esempio", variant="secondary", size="lg", scale=1)
with gr.TabItem("📄 PDF"):
pdf_in = gr.File(label="Carica un PDF", file_types=[".pdf"])
pdf_btn = gr.Button("Anonimizza", variant="primary", size="lg")
gr.HTML(_legend_html())
gr.Markdown("### Risultato", elem_classes=["section-label"])
with gr.Tabs():
with gr.TabItem("🎨 Evidenziato"):
out_highlighted = gr.HTML()
with gr.TabItem("🔒 Anonimizzato"):
out_anonymized = gr.HTML()
with gr.TabItem("📊 Report"):
out_report = gr.HTML()
outputs = [out_highlighted, out_anonymized, out_report]
inputs_common = [mode_radio, score_slider, use_regex_chk]
txt_btn.click(handle_text, inputs=[txt_in, *inputs_common], outputs=outputs)
pdf_btn.click(handle_pdf, inputs=[pdf_in, *inputs_common], outputs=outputs)
demo_btn.click(lambda: DEMO_TEXT, inputs=None, outputs=txt_in)
demo.launch()