File size: 6,628 Bytes
45a3a86
 
2414447
 
 
 
 
 
 
 
45a3a86
 
840b51e
 
2414447
 
 
 
 
 
 
fa40826
 
062f9b8
 
 
 
 
 
 
 
 
 
fa40826
 
 
 
 
 
45a3a86
 
 
 
 
 
 
840b51e
2414447
45a3a86
 
 
 
 
840b51e
 
2414447
 
 
 
 
 
 
 
 
 
 
 
45a3a86
 
 
 
 
 
f8c4552
45a3a86
 
062f9b8
 
 
 
45a3a86
 
2414447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
062f9b8
 
 
 
 
2414447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
988ede0
45a3a86
2414447
 
 
 
 
 
 
 
 
 
 
062f9b8
 
 
 
 
 
 
 
 
2414447
 
 
 
 
 
 
 
 
 
 
 
45a3a86
2414447
840b51e
 
 
988ede0
 
 
f8c4552
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""Redac β€” a local privacy gateway.

Two entry points, one redaction core:
  - Text:  paste confidential text -> detect PII -> reversibly redact.
  - Image: upload a document/ID image -> a local vision model extracts the
           fields -> the extracted text is redacted the same way.

The redacted text is what you would safely hand to a downstream LLM; the
mapping stays local so any answer can be rehydrated. All compute runs
in-Space (vision model on ZeroGPU). No external API calls.
"""

import gradio as gr

from redac import (
    detect_entities,
    redact,
    rehydrate,
    extract_text_from_image,
    DEFAULT_LABELS,
)
from redac.detect import gliner_available

import inspect

# show_copy_button exists in Gradio 5.x; in 6.x copying is the default and the
# kwarg was removed. Pass it only when supported so the app runs on both.
_COPY = (
    {"show_copy_button": True}
    if "show_copy_button" in inspect.signature(gr.Textbox.__init__).parameters
    else {}
)

_NER_NOTE = (
    "" if gliner_available()
    else "\n\n> ⚠️ GLiNER not installed: running **regex-only** (structured "
    "IDs like email/phone/IBAN). Names, addresses and other free-text PII "
    "need GLiNER, which is available on the Space."
)

EXAMPLE = (
    "Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. "
    "Contact: john.doe@example.com, +49 151 23456789. "
    "Insurance ID 123-45-6789, IBAN DE89370400440532013000."
)


def _redact_text(text, labels, threshold):
    entities = detect_entities(text, labels=labels or DEFAULT_LABELS, threshold=threshold)
    redacted, mapping = redact(text, entities)
    table = [[e.label, e.text, f"{e.score:.2f}", e.source] for e in entities]
    summary = f"{len(entities)} PII span(s) redacted into {len(mapping)} placeholder(s)."
    return redacted, table, summary, mapping


def run_text(text, labels, threshold):
    return _redact_text(text, labels, threshold)


def run_image(image, labels, threshold):
    extracted = extract_text_from_image(image)
    if not extracted:
        return "", "", [], "No image / nothing extracted.", {}
    redacted, table, summary, mapping = _redact_text(extracted, labels, threshold)
    return extracted, redacted, table, summary, mapping


def do_rehydrate(redacted_text, mapping):
    if not mapping:
        return "Run a redaction first."
    return rehydrate(redacted_text, mapping)


with gr.Blocks(title="Redac", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        "# πŸ–οΈ Redac\n"
        "**A local privacy gateway.** Upload a document or paste text, Redac "
        "extracts and redacts the PII *locally*, and you copy the safe output to "
        "use with any LLM. Raw values never leave: the mapping stays local so you "
        "can rehydrate answers yourself." + _NER_NOTE
    )

    with gr.Tabs():
        # --- Text tab --------------------------------------------------------
        with gr.Tab("Text"):
            t_map = gr.State({})
            with gr.Row():
                with gr.Column():
                    t_in = gr.Textbox(
                        label="Confidential text",
                        placeholder="Paste a document, message, or record...",
                        lines=10,
                        value=EXAMPLE,
                    )
                    t_labels = gr.Dropdown(
                        label="PII types", choices=DEFAULT_LABELS,
                        value=DEFAULT_LABELS, multiselect=True,
                    )
                    t_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold")
                    t_btn = gr.Button("Redact", variant="primary")
                with gr.Column():
                    t_out = gr.Textbox(
                        label="βœ… Safe output β€” copy this into your LLM (no raw PII)",
                        lines=10,
                        **_COPY,
                    )
                    t_sum = gr.Markdown()
                    t_tab = gr.Dataframe(
                        headers=["type", "value", "score", "source"],
                        label="Detected PII", wrap=True,
                    )
                    with gr.Accordion("Rehydrate (local only)", open=False):
                        t_reh_btn = gr.Button("Restore original values")
                        t_reh = gr.Textbox(label="Rehydrated", lines=6)

            t_btn.click(run_text, [t_in, t_labels, t_thr], [t_out, t_tab, t_sum, t_map])
            t_reh_btn.click(do_rehydrate, [t_out, t_map], [t_reh])

        # --- Image tab -------------------------------------------------------
        with gr.Tab("Image"):
            gr.Markdown(
                "Upload a document or ID image. A local vision model "
                "(MiniCPM-V-4.5) extracts the fields, then Redac redacts them."
            )
            i_map = gr.State({})
            with gr.Row():
                with gr.Column():
                    i_in = gr.Image(label="Document / ID image", type="pil")
                    i_labels = gr.Dropdown(
                        label="PII types", choices=DEFAULT_LABELS,
                        value=DEFAULT_LABELS, multiselect=True,
                    )
                    i_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold")
                    i_btn = gr.Button("Extract & redact", variant="primary")
                with gr.Column():
                    i_extracted = gr.Textbox(
                        label="πŸ”’ Extracted fields (raw β€” stays local, never sent)",
                        lines=8,
                    )
                    i_out = gr.Textbox(
                        label="βœ… Safe output β€” copy this into your LLM (no raw PII)",
                        lines=8,
                        **_COPY,
                    )
                    i_sum = gr.Markdown()
                    i_tab = gr.Dataframe(
                        headers=["type", "value", "score", "source"],
                        label="Detected PII", wrap=True,
                    )
                    with gr.Accordion("Rehydrate (local only)", open=False):
                        i_reh_btn = gr.Button("Restore original values")
                        i_reh = gr.Textbox(label="Rehydrated", lines=6)

            i_btn.click(
                run_image, [i_in, i_labels, i_thr],
                [i_extracted, i_out, i_tab, i_sum, i_map],
            )
            i_reh_btn.click(do_rehydrate, [i_out, i_map], [i_reh])


if __name__ == "__main__":
    import os

    port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
    demo.launch(server_name="0.0.0.0", server_port=port)