Redac / app.py
barath19's picture
feat: frame output for copy-to-LLM use with one-click copy button
062f9b8
Raw
History Blame Contribute Delete
6.63 kB
"""Redac β€” a local privacy gateway.
Two entry points, one redaction core:
- Text: paste confidential text -> detect PII -> reversibly redact.
- Image: upload a document/ID image -> a local vision model extracts the
fields -> the extracted text is redacted the same way.
The redacted text is what you would safely hand to a downstream LLM; the
mapping stays local so any answer can be rehydrated. All compute runs
in-Space (vision model on ZeroGPU). No external API calls.
"""
import gradio as gr
from redac import (
detect_entities,
redact,
rehydrate,
extract_text_from_image,
DEFAULT_LABELS,
)
from redac.detect import gliner_available
import inspect
# show_copy_button exists in Gradio 5.x; in 6.x copying is the default and the
# kwarg was removed. Pass it only when supported so the app runs on both.
_COPY = (
{"show_copy_button": True}
if "show_copy_button" in inspect.signature(gr.Textbox.__init__).parameters
else {}
)
_NER_NOTE = (
"" if gliner_available()
else "\n\n> ⚠️ GLiNER not installed: running **regex-only** (structured "
"IDs like email/phone/IBAN). Names, addresses and other free-text PII "
"need GLiNER, which is available on the Space."
)
EXAMPLE = (
"Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. "
"Contact: john.doe@example.com, +49 151 23456789. "
"Insurance ID 123-45-6789, IBAN DE89370400440532013000."
)
def _redact_text(text, labels, threshold):
entities = detect_entities(text, labels=labels or DEFAULT_LABELS, threshold=threshold)
redacted, mapping = redact(text, entities)
table = [[e.label, e.text, f"{e.score:.2f}", e.source] for e in entities]
summary = f"{len(entities)} PII span(s) redacted into {len(mapping)} placeholder(s)."
return redacted, table, summary, mapping
def run_text(text, labels, threshold):
return _redact_text(text, labels, threshold)
def run_image(image, labels, threshold):
extracted = extract_text_from_image(image)
if not extracted:
return "", "", [], "No image / nothing extracted.", {}
redacted, table, summary, mapping = _redact_text(extracted, labels, threshold)
return extracted, redacted, table, summary, mapping
def do_rehydrate(redacted_text, mapping):
if not mapping:
return "Run a redaction first."
return rehydrate(redacted_text, mapping)
with gr.Blocks(title="Redac", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# πŸ–οΈ Redac\n"
"**A local privacy gateway.** Upload a document or paste text, Redac "
"extracts and redacts the PII *locally*, and you copy the safe output to "
"use with any LLM. Raw values never leave: the mapping stays local so you "
"can rehydrate answers yourself." + _NER_NOTE
)
with gr.Tabs():
# --- Text tab --------------------------------------------------------
with gr.Tab("Text"):
t_map = gr.State({})
with gr.Row():
with gr.Column():
t_in = gr.Textbox(
label="Confidential text",
placeholder="Paste a document, message, or record...",
lines=10,
value=EXAMPLE,
)
t_labels = gr.Dropdown(
label="PII types", choices=DEFAULT_LABELS,
value=DEFAULT_LABELS, multiselect=True,
)
t_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold")
t_btn = gr.Button("Redact", variant="primary")
with gr.Column():
t_out = gr.Textbox(
label="βœ… Safe output β€” copy this into your LLM (no raw PII)",
lines=10,
**_COPY,
)
t_sum = gr.Markdown()
t_tab = gr.Dataframe(
headers=["type", "value", "score", "source"],
label="Detected PII", wrap=True,
)
with gr.Accordion("Rehydrate (local only)", open=False):
t_reh_btn = gr.Button("Restore original values")
t_reh = gr.Textbox(label="Rehydrated", lines=6)
t_btn.click(run_text, [t_in, t_labels, t_thr], [t_out, t_tab, t_sum, t_map])
t_reh_btn.click(do_rehydrate, [t_out, t_map], [t_reh])
# --- Image tab -------------------------------------------------------
with gr.Tab("Image"):
gr.Markdown(
"Upload a document or ID image. A local vision model "
"(MiniCPM-V-4.5) extracts the fields, then Redac redacts them."
)
i_map = gr.State({})
with gr.Row():
with gr.Column():
i_in = gr.Image(label="Document / ID image", type="pil")
i_labels = gr.Dropdown(
label="PII types", choices=DEFAULT_LABELS,
value=DEFAULT_LABELS, multiselect=True,
)
i_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold")
i_btn = gr.Button("Extract & redact", variant="primary")
with gr.Column():
i_extracted = gr.Textbox(
label="πŸ”’ Extracted fields (raw β€” stays local, never sent)",
lines=8,
)
i_out = gr.Textbox(
label="βœ… Safe output β€” copy this into your LLM (no raw PII)",
lines=8,
**_COPY,
)
i_sum = gr.Markdown()
i_tab = gr.Dataframe(
headers=["type", "value", "score", "source"],
label="Detected PII", wrap=True,
)
with gr.Accordion("Rehydrate (local only)", open=False):
i_reh_btn = gr.Button("Restore original values")
i_reh = gr.Textbox(label="Rehydrated", lines=6)
i_btn.click(
run_image, [i_in, i_labels, i_thr],
[i_extracted, i_out, i_tab, i_sum, i_map],
)
i_reh_btn.click(do_rehydrate, [i_out, i_map], [i_reh])
if __name__ == "__main__":
import os
port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
demo.launch(server_name="0.0.0.0", server_port=port)