Spaces:
Running on Zero
Running on Zero
| """Redac β a local privacy gateway. | |
| Two entry points, one redaction core: | |
| - Text: paste confidential text -> detect PII -> reversibly redact. | |
| - Image: upload a document/ID image -> a local vision model extracts the | |
| fields -> the extracted text is redacted the same way. | |
| The redacted text is what you would safely hand to a downstream LLM; the | |
| mapping stays local so any answer can be rehydrated. All compute runs | |
| in-Space (vision model on ZeroGPU). No external API calls. | |
| """ | |
| import gradio as gr | |
| from redac import ( | |
| detect_entities, | |
| redact, | |
| rehydrate, | |
| extract_text_from_image, | |
| DEFAULT_LABELS, | |
| ) | |
| from redac.detect import gliner_available | |
| import inspect | |
| # show_copy_button exists in Gradio 5.x; in 6.x copying is the default and the | |
| # kwarg was removed. Pass it only when supported so the app runs on both. | |
| _COPY = ( | |
| {"show_copy_button": True} | |
| if "show_copy_button" in inspect.signature(gr.Textbox.__init__).parameters | |
| else {} | |
| ) | |
| _NER_NOTE = ( | |
| "" if gliner_available() | |
| else "\n\n> β οΈ GLiNER not installed: running **regex-only** (structured " | |
| "IDs like email/phone/IBAN). Names, addresses and other free-text PII " | |
| "need GLiNER, which is available on the Space." | |
| ) | |
| EXAMPLE = ( | |
| "Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. " | |
| "Contact: john.doe@example.com, +49 151 23456789. " | |
| "Insurance ID 123-45-6789, IBAN DE89370400440532013000." | |
| ) | |
| def _redact_text(text, labels, threshold): | |
| entities = detect_entities(text, labels=labels or DEFAULT_LABELS, threshold=threshold) | |
| redacted, mapping = redact(text, entities) | |
| table = [[e.label, e.text, f"{e.score:.2f}", e.source] for e in entities] | |
| summary = f"{len(entities)} PII span(s) redacted into {len(mapping)} placeholder(s)." | |
| return redacted, table, summary, mapping | |
| def run_text(text, labels, threshold): | |
| return _redact_text(text, labels, threshold) | |
| def run_image(image, labels, threshold): | |
| extracted = extract_text_from_image(image) | |
| if not extracted: | |
| return "", "", [], "No image / nothing extracted.", {} | |
| redacted, table, summary, mapping = _redact_text(extracted, labels, threshold) | |
| return extracted, redacted, table, summary, mapping | |
| def do_rehydrate(redacted_text, mapping): | |
| if not mapping: | |
| return "Run a redaction first." | |
| return rehydrate(redacted_text, mapping) | |
| with gr.Blocks(title="Redac", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| "# ποΈ Redac\n" | |
| "**A local privacy gateway.** Upload a document or paste text, Redac " | |
| "extracts and redacts the PII *locally*, and you copy the safe output to " | |
| "use with any LLM. Raw values never leave: the mapping stays local so you " | |
| "can rehydrate answers yourself." + _NER_NOTE | |
| ) | |
| with gr.Tabs(): | |
| # --- Text tab -------------------------------------------------------- | |
| with gr.Tab("Text"): | |
| t_map = gr.State({}) | |
| with gr.Row(): | |
| with gr.Column(): | |
| t_in = gr.Textbox( | |
| label="Confidential text", | |
| placeholder="Paste a document, message, or record...", | |
| lines=10, | |
| value=EXAMPLE, | |
| ) | |
| t_labels = gr.Dropdown( | |
| label="PII types", choices=DEFAULT_LABELS, | |
| value=DEFAULT_LABELS, multiselect=True, | |
| ) | |
| t_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold") | |
| t_btn = gr.Button("Redact", variant="primary") | |
| with gr.Column(): | |
| t_out = gr.Textbox( | |
| label="β Safe output β copy this into your LLM (no raw PII)", | |
| lines=10, | |
| **_COPY, | |
| ) | |
| t_sum = gr.Markdown() | |
| t_tab = gr.Dataframe( | |
| headers=["type", "value", "score", "source"], | |
| label="Detected PII", wrap=True, | |
| ) | |
| with gr.Accordion("Rehydrate (local only)", open=False): | |
| t_reh_btn = gr.Button("Restore original values") | |
| t_reh = gr.Textbox(label="Rehydrated", lines=6) | |
| t_btn.click(run_text, [t_in, t_labels, t_thr], [t_out, t_tab, t_sum, t_map]) | |
| t_reh_btn.click(do_rehydrate, [t_out, t_map], [t_reh]) | |
| # --- Image tab ------------------------------------------------------- | |
| with gr.Tab("Image"): | |
| gr.Markdown( | |
| "Upload a document or ID image. A local vision model " | |
| "(MiniCPM-V-4.5) extracts the fields, then Redac redacts them." | |
| ) | |
| i_map = gr.State({}) | |
| with gr.Row(): | |
| with gr.Column(): | |
| i_in = gr.Image(label="Document / ID image", type="pil") | |
| i_labels = gr.Dropdown( | |
| label="PII types", choices=DEFAULT_LABELS, | |
| value=DEFAULT_LABELS, multiselect=True, | |
| ) | |
| i_thr = gr.Slider(0.1, 0.9, value=0.45, step=0.05, label="Threshold") | |
| i_btn = gr.Button("Extract & redact", variant="primary") | |
| with gr.Column(): | |
| i_extracted = gr.Textbox( | |
| label="π Extracted fields (raw β stays local, never sent)", | |
| lines=8, | |
| ) | |
| i_out = gr.Textbox( | |
| label="β Safe output β copy this into your LLM (no raw PII)", | |
| lines=8, | |
| **_COPY, | |
| ) | |
| i_sum = gr.Markdown() | |
| i_tab = gr.Dataframe( | |
| headers=["type", "value", "score", "source"], | |
| label="Detected PII", wrap=True, | |
| ) | |
| with gr.Accordion("Rehydrate (local only)", open=False): | |
| i_reh_btn = gr.Button("Restore original values") | |
| i_reh = gr.Textbox(label="Rehydrated", lines=6) | |
| i_btn.click( | |
| run_image, [i_in, i_labels, i_thr], | |
| [i_extracted, i_out, i_tab, i_sum, i_map], | |
| ) | |
| i_reh_btn.click(do_rehydrate, [i_out, i_map], [i_reh]) | |
| if __name__ == "__main__": | |
| import os | |
| port = int(os.environ.get("GRADIO_SERVER_PORT", "7860")) | |
| demo.launch(server_name="0.0.0.0", server_port=port) | |