Spaces:
Sleeping
Sleeping
File size: 6,628 Bytes
45a3a86 2414447 45a3a86 840b51e 2414447 fa40826 062f9b8 fa40826 45a3a86 840b51e 2414447 45a3a86 840b51e 2414447 45a3a86 f8c4552 45a3a86 062f9b8 45a3a86 2414447 062f9b8 2414447 988ede0 45a3a86 2414447 062f9b8 2414447 45a3a86 2414447 840b51e 988ede0 f8c4552 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | """Redac β a local privacy gateway.
Two entry points, one redaction core:
- Text: paste confidential text -> detect PII -> reversibly redact.
- Image: upload a document/ID image -> a local vision model extracts the
fields -> the extracted text is redacted the same way.
The redacted text is what you would safely hand to a downstream LLM; the
mapping stays local so any answer can be rehydrated. All compute runs
in-Space (vision model on ZeroGPU). No external API calls.
"""
import gradio as gr
from redac import (
detect_entities,
redact,
rehydrate,
extract_text_from_image,
DEFAULT_LABELS,
)
from redac.detect import gliner_available
import inspect
# show_copy_button exists in Gradio 5.x; in 6.x copying is the default and the
# kwarg was removed. Pass it only when supported so the app runs on both.
_COPY = (
{"show_copy_button": True}
if "show_copy_button" in inspect.signature(gr.Textbox.__init__).parameters
else {}
)
_NER_NOTE = (
"" if gliner_available()
else "\n\n> β οΈ GLiNER not installed: running **regex-only** (structured "
"IDs like email/phone/IBAN). Names, addresses and other free-text PII "
"need GLiNER, which is available on the Space."
)
EXAMPLE = (
"Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. "
"Contact: john.doe@example.com, +49 151 23456789. "
"Insurance ID 123-45-6789, IBAN DE89370400440532013000."
)
def _redact_text(text, labels, threshold):
entities = detect_entities(text, labels=labels or DEFAULT_LABELS, threshold=threshold)
redacted, mapping = redact(text, entities)
table = [[e.label, e.text, f"{e.score:.2f}", e.source] for e in entities]
summary = f"{len(entities)} PII span(s) redacted into {len(mapping)} placeholder(s)."
return redacted, table, summary, mapping
def run_text(text, labels, threshold):
return _redact_text(text, labels, threshold)
def run_image(image, labels, threshold):
extracted = extract_text_from_image(image)
if not extracted:
return "", "", [], "No image / nothing extracted.", {}
redacted, table, summary, mapping = _redact_text(extracted, labels, threshold)
return extracted, redacted, table, summary, mapping
def do_rehydrate(redacted_text, mapping):
if not mapping:
return "Run a redaction first."
return rehydrate(redacted_text, mapping)
with gr.Blocks(title="Redac", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# ποΈ Redac\n"
"**A local privacy gateway.** Upload a document or paste text, Redac "
"extracts and redacts the PII *locally*, and you copy the safe output to "
"use with any LLM. Raw values never leave: the mapping stays local so you "
"can rehydrate answers yourself." + _NER_NOTE
)
with gr.Tabs():
# --- Text tab --------------------------------------------------------
with gr.Tab("Text"):
t_map = gr.State({})
with gr.Row():
with gr.Column():
t_in = gr.Textbox(
label="Confidential text",
placeholder="Paste a document, message, or record...",
lines=10,
value=EXAMPLE,
)
t_labels = gr.Dropdown(
label="PII types", choices=DEFAULT_LABELS,
value=DEFAULT_LABELS, multiselect=True,
)
t_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold")
t_btn = gr.Button("Redact", variant="primary")
with gr.Column():
t_out = gr.Textbox(
label="β
Safe output β copy this into your LLM (no raw PII)",
lines=10,
**_COPY,
)
t_sum = gr.Markdown()
t_tab = gr.Dataframe(
headers=["type", "value", "score", "source"],
label="Detected PII", wrap=True,
)
with gr.Accordion("Rehydrate (local only)", open=False):
t_reh_btn = gr.Button("Restore original values")
t_reh = gr.Textbox(label="Rehydrated", lines=6)
t_btn.click(run_text, [t_in, t_labels, t_thr], [t_out, t_tab, t_sum, t_map])
t_reh_btn.click(do_rehydrate, [t_out, t_map], [t_reh])
# --- Image tab -------------------------------------------------------
with gr.Tab("Image"):
gr.Markdown(
"Upload a document or ID image. A local vision model "
"(MiniCPM-V-4.5) extracts the fields, then Redac redacts them."
)
i_map = gr.State({})
with gr.Row():
with gr.Column():
i_in = gr.Image(label="Document / ID image", type="pil")
i_labels = gr.Dropdown(
label="PII types", choices=DEFAULT_LABELS,
value=DEFAULT_LABELS, multiselect=True,
)
i_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold")
i_btn = gr.Button("Extract & redact", variant="primary")
with gr.Column():
i_extracted = gr.Textbox(
label="π Extracted fields (raw β stays local, never sent)",
lines=8,
)
i_out = gr.Textbox(
label="β
Safe output β copy this into your LLM (no raw PII)",
lines=8,
**_COPY,
)
i_sum = gr.Markdown()
i_tab = gr.Dataframe(
headers=["type", "value", "score", "source"],
label="Detected PII", wrap=True,
)
with gr.Accordion("Rehydrate (local only)", open=False):
i_reh_btn = gr.Button("Restore original values")
i_reh = gr.Textbox(label="Rehydrated", lines=6)
i_btn.click(
run_image, [i_in, i_labels, i_thr],
[i_extracted, i_out, i_tab, i_sum, i_map],
)
i_reh_btn.click(do_rehydrate, [i_out, i_map], [i_reh])
if __name__ == "__main__":
import os
port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
demo.launch(server_name="0.0.0.0", server_port=port)
|