""" Hush ==== A local privacy airlock for cloud AI. Paste text, and Hush scrubs out the sensitive parts (names, emails, phones, cards, IDs, places) into placeholders like [[NAME_1]]. Use the safe version anywhere. When you bring the AI's reply back, Hush pours your real values back in, from a key that stays in your session and is never logged. Track 1 style tool for the "Small Models Big Adventures" hackathon. Model: openbmb/MiniCPM5-1B (1B, on-device, well under the 32B ceiling). Why a SMALL model is the point: redacting secrets is the one job you must not hand to a remote service, because that is the leak you are trying to prevent. A 1B model is small enough to run entirely on your own machine, so nothing has to leave it. Engineering: a deterministic layer (validated regex, Luhn-checked cards, range-checked IPs) guarantees the structured PII no matter what. The small model only ADDS contextual entities, and it can only ever redact strings that literally appear in your text, so it cannot hallucinate secrets into existence. With no model (keeper mode) the deterministic layer still does the whole structured job, so the tool is always useful. """ import os import re import html import inspect import gradio as gr _BLOCKS_HAS_CSS = "css" in inspect.signature(gr.Blocks.__init__).parameters _LAUNCH_HAS_SSR = "ssr_mode" in inspect.signature(gr.Blocks.launch).parameters _TB_HAS_COPY = "show_copy_button" in inspect.signature(gr.Textbox.__init__).parameters _COPY = {"show_copy_button": True} if _TB_HAS_COPY else {} MODEL_ID = os.environ.get("HUSH_MODEL", "openbmb/MiniCPM5-1B") DEBUG = os.environ.get("HUSH_DEBUG", "").strip().lower() in {"1", "true", "yes"} MAX_CHARS = int(os.environ.get("HUSH_MAX_CHARS", "6000")) MODEL_CHARS = 3000 # cap what we hand the model, for speed # ----------------------------------------------------------- redaction core ---- TYPE_LABEL = {"URL": "LINK", "EMAIL": "EMAIL", "SSN": "SSN", "IP": "IP", "CARD": "CARD", "PHONE": "PHONE", "PERSON": "NAME", "ORG": "ORG", "LOCATION": "PLACE", "AGE": "AGE", "ID": "ID"} PLACEHOLDER_RE = re.compile(r"\[\[([A-Z]+)_(\d+)\]\]") DETECTORS = [ ("URL", re.compile(r"\bhttps?://[^\s<>()\[\]]+", re.I)), ("EMAIL", re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.-]+\b")), ("SSN", re.compile(r"\b\d{3}-\d{2}-\d{4}\b")), ("IP", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")), ("CARD", re.compile(r"\b(?:\d[ -]?){13,19}\b")), ("PHONE", re.compile( r"(? 9: d -= 9 total += d return total % 10 == 0 def _valid_ipv4(s): parts = s.split(".") return len(parts) == 4 and all(p.isdigit() and 0 <= int(p) <= 255 for p in parts) def find_spans(text): spans = [] occupied = [False] * len(text) def free(a, b): return not any(occupied[a:b]) def occupy(a, b): for i in range(a, b): occupied[i] = True for typ, rx in DETECTORS: for mt in rx.finditer(text): a, b, val = mt.start(), mt.end(), mt.group() if typ in ("URL", "EMAIL"): # trim trailing punctuation while b > a and text[b - 1] in _TRAIL: b -= 1 val = text[a:b] if typ == "IP" and not _valid_ipv4(val): continue if typ == "CARD" and not _luhn_ok(val): continue if b <= a or not free(a, b): continue spans.append((a, b, typ)) occupy(a, b) return spans, occupied def _add_model_spans(text, spans, occupied, entities): for typ, val in entities: val = (val or "").strip().strip('"').strip() if len(val) < 2: continue pat = re.escape(val) if val[0].isalnum(): pat = r"(?.*?", "", raw or "", flags=re.S | re.I) raw = re.sub(r".*$", "", raw, flags=re.S | re.I) ents, seen = [], set() for line in raw.splitlines(): mt = re.match(r"\s*(PERSON|ORG|LOCATION|AGE|ID)\s*[:\-]\s*(.+)", line, re.I) if not mt: continue typ = mt.group(1).upper() val = mt.group(2).strip().strip('"').strip() if val and val.upper() != "NONE" and (typ, val.lower()) not in seen: seen.add((typ, val.lower())) ents.append((typ, val)) return ents # --------------------------------------------------------------- rendering ----- def esc(s): return html.escape(str(s)) def render_summary(mapping, note=""): if not mapping: return ('
No obvious secrets found. The text is unchanged. ' 'Still, glance over it yourself before you share it.
') counts = {} for ph in mapping: lab = ph[2:-2].rsplit("_", 1)[0] counts[lab] = counts.get(lab, 0) + 1 chips = "".join(f'{esc(k)} · {v}' for k, v in sorted(counts.items())) rows = "".join(f'{esc(ph)}{esc(orig)}' for ph, orig in mapping.items()) return (f'
Hushed {len(mapping)} item(s)
' f'
{chips}
' f'
show what was hidden (stays on this page only)' f'' f'{rows}
placeholderyour original
{note}
') def on_scrub(text, _state): text = text or "" if not text.strip(): return "", ('
Paste some text above, then press Hush.
'), {} note = "" if len(text) > MAX_CHARS: text = text[:MAX_CHARS] note = (f'
Only the first {MAX_CHARS} characters were ' f'processed.
') entities = [] if MODE == "model": try: entities = _model_entities(text) except Exception as exc: # noqa: BLE001 print(f"[Hush] model error: {exc}") entities = [] redacted, mapping = redact(text, entities) return redacted, render_summary(mapping, note), mapping def on_restore(ai_text, mapping): restored = restore(ai_text or "", mapping or {}) return restored CSS = """ @import url('https://fonts.googleapis.com/css2?family=Fraunces:ital,opsz,wght@0,9..144,400;0,9..144,600;1,9..144,400&family=Spectral:ital,wght@0,400;0,500;1,400&display=swap'); :root{--paper:#eef1ee;--paper-2:#e3e8e4;--ink:#26302c;--ink-soft:#5c6b63; --teal:#2f5d57;--amber:#9a6b2f;--line:#bcc8c1;--ok:#2f5d57;} .gradio-container,.gradio-container.dark,.dark{ --body-background-fill:transparent;--background-fill-primary:#fbfdfb;--background-fill-secondary:#eef3ef; --block-background-fill:#fbfdfb;--block-border-color:var(--line);--border-color-primary:var(--line); --body-text-color:var(--ink);--body-text-color-subdued:var(--ink-soft); --block-label-text-color:var(--teal);--block-title-text-color:var(--ink); --block-label-background-fill:#e3ebe5;--block-title-background-fill:transparent; --input-background-fill:#fbfdfb;--input-border-color:var(--line);--input-placeholder-color:var(--ink-soft); --button-primary-background-fill:var(--teal);--button-primary-background-fill-hover:#244641; --button-primary-text-color:#f4f8f5;--button-primary-border-color:#244641; --button-secondary-background-fill:#e3ebe5;--button-secondary-background-fill-hover:#d6e0d9; --button-secondary-text-color:var(--ink);--button-secondary-border-color:var(--line); --color-accent:var(--amber);--color-accent-soft:#efe3cd;} .gradio-container{background:radial-gradient(120% 80% at 80% -10%,#f3f6f3,var(--paper) 55%,var(--paper-2)); font-family:'Spectral',Georgia,serif !important;color:var(--ink) !important;max-width:920px !important;} .gradio-container textarea,.gradio-container input[type="text"],.gradio-container input:not([type]){ background:#fbfdfb !important;color:var(--ink) !important;-webkit-text-fill-color:var(--ink) !important;border-color:var(--line) !important;font-family:ui-monospace,Menlo,Consolas,monospace !important;} .gradio-container textarea::placeholder{color:var(--ink-soft) !important;-webkit-text-fill-color:var(--ink-soft) !important;opacity:1;} .hu-title{font-family:'Fraunces',serif;font-weight:600;font-size:2.5rem;line-height:1;margin:.2rem 0 0;} .hu-title em{font-style:italic;color:var(--teal);} .hu-sub{font-style:italic;color:var(--ink-soft);margin:.35rem 0 1rem;font-size:1.05rem;} .hu-mode{display:inline-block;font-size:.72rem;letter-spacing:.12em;text-transform:uppercase;color:var(--teal);border:1px solid var(--line);border-radius:999px;padding:.15rem .6rem;} .hu-step{font-family:'Fraunces',serif;font-weight:600;color:var(--teal);margin:14px 0 4px;font-size:1.05rem;} .sum{background:#fbfdfb;border:1px solid var(--line);border-left:3px solid var(--teal);border-radius:10px;padding:12px 14px;margin-top:6px;} .sum.empty{color:var(--ink-soft);font-style:italic;border-left-color:var(--line);} .sum-h{font-family:'Fraunces',serif;font-weight:600;color:var(--ink);margin-bottom:6px;} .chips{display:flex;flex-wrap:wrap;gap:6px;} .chip{font-size:.78rem;letter-spacing:.04em;background:#e3ebe5;color:var(--teal);border:1px solid var(--line);border-radius:999px;padding:.12rem .6rem;} .sum details{margin-top:10px;} .sum summary{cursor:pointer;color:var(--amber);font-size:.9rem;} .map{width:100%;border-collapse:collapse;margin-top:8px;font-size:.9rem;} .map th{text-align:left;color:var(--ink-soft);font-weight:400;border-bottom:1px solid var(--line);padding:4px 6px;} .map td{border-bottom:1px solid var(--line);padding:4px 6px;vertical-align:top;} .map code{background:#eef3ef;border-radius:4px;padding:.05rem .3rem;color:var(--teal);} .sum-note{margin-top:8px;color:var(--ink-soft);font-size:.85rem;font-style:italic;} .hu-foot{color:var(--ink-soft);font-size:.82rem;font-style:italic;text-align:center;margin-top:14px;} footer{display:none !important;} """ _bk = {"title": "Hush"} if _BLOCKS_HAS_CSS: _bk["css"] = CSS _bk["theme"] = gr.themes.Soft() with gr.Blocks(**_bk) as demo: state = gr.State({}) if MODE == "model": public = "MiniCPM5-1B · on-device + rules" else: public = "Hush · rules engine" mode_label = f"{public} · [{MODE}]" if DEBUG else public gr.HTML(f"""
Hush
A local privacy airlock. Scrub secrets out before you paste into cloud AI; pour them back in after.
{mode_label}
""") gr.HTML('
1 · Scrub your text
') src = gr.Textbox(placeholder="Paste the text you want to share... (names, emails, phone numbers, addresses, card and ID numbers will be hidden)", lines=7, show_label=False) scrub = gr.Button("Hush it", variant="primary") out = gr.Textbox(label="Safe to share", lines=7, interactive=True, **_COPY) summary = gr.HTML(render_summary({})) gr.HTML('
2 · Bring the reply back
') reply = gr.Textbox(placeholder="Paste the AI's reply here (it will still contain the [[PLACEHOLDERS]])...", lines=5, show_label=False) unscrub = gr.Button("Restore my details", variant="primary") restored = gr.Textbox(label="Your reply, with real details restored", lines=6, interactive=True, **_COPY) gr.HTML('
The key that maps placeholders to your details stays in ' 'this browser session and is never stored. Run Hush locally and nothing leaves your machine.
') scrub.click(on_scrub, [src, state], [out, summary, state]) unscrub.click(on_restore, [reply, state], restored) if __name__ == "__main__": _lk = {} if not _BLOCKS_HAS_CSS: _lk["css"] = CSS _lk["theme"] = gr.themes.Soft() if _LAUNCH_HAS_SSR: _lk["ssr_mode"] = False demo.queue(max_size=24).launch(**_lk)