"""Redac — a local privacy gateway. Two entry points, one redaction core: - Text: paste confidential text -> detect PII -> reversibly redact. - Image: upload a document/ID image -> a local vision model extracts the fields -> the extracted text is redacted the same way. The redacted text is what you would safely hand to a downstream LLM; the mapping stays local so any answer can be rehydrated. All compute runs in-Space (vision model on ZeroGPU). No external API calls. """ import gradio as gr from redac import ( detect_entities, redact, rehydrate, extract_text_from_image, DEFAULT_LABELS, ) from redac.detect import gliner_available import inspect # show_copy_button exists in Gradio 5.x; in 6.x copying is the default and the # kwarg was removed. Pass it only when supported so the app runs on both. _COPY = ( {"show_copy_button": True} if "show_copy_button" in inspect.signature(gr.Textbox.__init__).parameters else {} ) _NER_NOTE = ( "" if gliner_available() else "\n\n> ⚠️ GLiNER not installed: running **regex-only** (structured " "IDs like email/phone/IBAN). Names, addresses and other free-text PII " "need GLiNER, which is available on the Space." ) EXAMPLE = ( "Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. " "Contact: john.doe@example.com, +49 151 23456789. " "Insurance ID 123-45-6789, IBAN DE89370400440532013000." ) def _redact_text(text, labels, threshold): entities = detect_entities(text, labels=labels or DEFAULT_LABELS, threshold=threshold) redacted, mapping = redact(text, entities) table = [[e.label, e.text, f"{e.score:.2f}", e.source] for e in entities] summary = f"{len(entities)} PII span(s) redacted into {len(mapping)} placeholder(s)." return redacted, table, summary, mapping def run_text(text, labels, threshold): return _redact_text(text, labels, threshold) def run_image(image, labels, threshold): extracted = extract_text_from_image(image) if not extracted: return "", "", [], "No image / nothing extracted.", {} redacted, table, summary, mapping = _redact_text(extracted, labels, threshold) return extracted, redacted, table, summary, mapping def do_rehydrate(redacted_text, mapping): if not mapping: return "Run a redaction first." return rehydrate(redacted_text, mapping) with gr.Blocks(title="Redac", theme=gr.themes.Soft()) as demo: gr.Markdown( "# 🖍️ Redac\n" "**A local privacy gateway.** Upload a document or paste text, Redac " "extracts and redacts the PII *locally*, and you copy the safe output to " "use with any LLM. Raw values never leave: the mapping stays local so you " "can rehydrate answers yourself." + _NER_NOTE ) with gr.Tabs(): # --- Text tab -------------------------------------------------------- with gr.Tab("Text"): t_map = gr.State({}) with gr.Row(): with gr.Column(): t_in = gr.Textbox( label="Confidential text", placeholder="Paste a document, message, or record...", lines=10, value=EXAMPLE, ) t_labels = gr.Dropdown( label="PII types", choices=DEFAULT_LABELS, value=DEFAULT_LABELS, multiselect=True, ) t_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold") t_btn = gr.Button("Redact", variant="primary") with gr.Column(): t_out = gr.Textbox( label="✅ Safe output — copy this into your LLM (no raw PII)", lines=10, **_COPY, ) t_sum = gr.Markdown() t_tab = gr.Dataframe( headers=["type", "value", "score", "source"], label="Detected PII", wrap=True, ) with gr.Accordion("Rehydrate (local only)", open=False): t_reh_btn = gr.Button("Restore original values") t_reh = gr.Textbox(label="Rehydrated", lines=6) t_btn.click(run_text, [t_in, t_labels, t_thr], [t_out, t_tab, t_sum, t_map]) t_reh_btn.click(do_rehydrate, [t_out, t_map], [t_reh]) # --- Image tab ------------------------------------------------------- with gr.Tab("Image"): gr.Markdown( "Upload a document or ID image. A local vision model " "(MiniCPM-V-4.5) extracts the fields, then Redac redacts them." ) i_map = gr.State({}) with gr.Row(): with gr.Column(): i_in = gr.Image(label="Document / ID image", type="pil") i_labels = gr.Dropdown( label="PII types", choices=DEFAULT_LABELS, value=DEFAULT_LABELS, multiselect=True, ) i_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold") i_btn = gr.Button("Extract & redact", variant="primary") with gr.Column(): i_extracted = gr.Textbox( label="🔒 Extracted fields (raw — stays local, never sent)", lines=8, ) i_out = gr.Textbox( label="✅ Safe output — copy this into your LLM (no raw PII)", lines=8, **_COPY, ) i_sum = gr.Markdown() i_tab = gr.Dataframe( headers=["type", "value", "score", "source"], label="Detected PII", wrap=True, ) with gr.Accordion("Rehydrate (local only)", open=False): i_reh_btn = gr.Button("Restore original values") i_reh = gr.Textbox(label="Rehydrated", lines=6) i_btn.click( run_image, [i_in, i_labels, i_thr], [i_extracted, i_out, i_tab, i_sum, i_map], ) i_reh_btn.click(do_rehydrate, [i_out, i_map], [i_reh]) if __name__ == "__main__": import os port = int(os.environ.get("GRADIO_SERVER_PORT", "7860")) demo.launch(server_name="0.0.0.0", server_port=port)