barath19 OpenAI Codex commited on
Commit
fa40826
·
1 Parent(s): 2414447

fix: graceful regex-only fallback when GLiNER unavailable

Browse files

Prevent a hard crash (ModuleNotFoundError: gliner) when GLiNER isn't
installed locally, and reduce regex false positives.

- redac/detect.py: add gliner_available(); skip the GLiNER path and run
regex-only when it can't be imported. Phone recognizer now ignores dates
and digit runs shorter than 7 digits (serial numbers).
- app.py: show a warning banner when running in regex-only mode.

On the Space, GLiNER is installed so full NER runs; locally the app no
longer errors.

Co-authored-by: OpenAI Codex <codex@openai.com>

Files changed (2) hide show
  1. app.py +9 -1
  2. redac/detect.py +19 -4
app.py CHANGED
@@ -19,6 +19,14 @@ from redac import (
19
  extract_text_from_image,
20
  DEFAULT_LABELS,
21
  )
 
 
 
 
 
 
 
 
22
 
23
  EXAMPLE = (
24
  "Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. "
@@ -57,7 +65,7 @@ with gr.Blocks(title="Redac") as demo:
57
  gr.Markdown(
58
  "# 🖍️ Redac\n"
59
  "**A local privacy gateway.** Detect and redact PII *before* it reaches "
60
- "a downstream model. Reversible: the mapping stays local."
61
  )
62
 
63
  with gr.Tabs():
 
19
  extract_text_from_image,
20
  DEFAULT_LABELS,
21
  )
22
+ from redac.detect import gliner_available
23
+
24
+ _NER_NOTE = (
25
+ "" if gliner_available()
26
+ else "\n\n> ⚠️ GLiNER not installed: running **regex-only** (structured "
27
+ "IDs like email/phone/IBAN). Names, addresses and other free-text PII "
28
+ "need GLiNER, which is available on the Space."
29
+ )
30
 
31
  EXAMPLE = (
32
  "Patient John A. Doe, DOB 1985-04-12, was admitted on 2026-06-01. "
 
65
  gr.Markdown(
66
  "# 🖍️ Redac\n"
67
  "**A local privacy gateway.** Detect and redact PII *before* it reaches "
68
+ "a downstream model. Reversible: the mapping stays local." + _NER_NOTE
69
  )
70
 
71
  with gr.Tabs():
redac/detect.py CHANGED
@@ -71,9 +71,12 @@ def _regex_entities(text: str) -> List[Entity]:
71
  span = m.group().strip()
72
  if len(span) < 4:
73
  continue
74
- # Don't let the phone recognizer misfire on dates.
75
- if label == "phone number" and _DATE_RE.match(span):
76
- continue
 
 
 
77
  out.append(
78
  Entity(
79
  start=m.start(),
@@ -89,6 +92,18 @@ def _regex_entities(text: str) -> List[Entity]:
89
 
90
  # --- GLiNER ------------------------------------------------------------------
91
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  @lru_cache(maxsize=1)
93
  def _load_gliner():
94
  from gliner import GLiNER
@@ -137,6 +152,6 @@ def detect_entities(
137
  return []
138
  labels = labels or DEFAULT_LABELS
139
  found: List[Entity] = _regex_entities(text)
140
- if use_gliner:
141
  found.extend(_gliner_entities(text, labels, threshold))
142
  return _resolve_overlaps(found)
 
71
  span = m.group().strip()
72
  if len(span) < 4:
73
  continue
74
+ # Phone recognizer: ignore dates and short digit runs (serials).
75
+ if label == "phone number":
76
+ if _DATE_RE.match(span):
77
+ continue
78
+ if sum(c.isdigit() for c in span) < 7:
79
+ continue
80
  out.append(
81
  Entity(
82
  start=m.start(),
 
92
 
93
  # --- GLiNER ------------------------------------------------------------------
94
 
95
+ @lru_cache(maxsize=1)
96
+ def gliner_available() -> bool:
97
+ """True if GLiNER can be imported. Lets the app fall back to regex-only
98
+ (and tell the user) instead of crashing when GLiNER isn't installed."""
99
+ try:
100
+ import gliner # noqa: F401
101
+
102
+ return True
103
+ except Exception:
104
+ return False
105
+
106
+
107
  @lru_cache(maxsize=1)
108
  def _load_gliner():
109
  from gliner import GLiNER
 
152
  return []
153
  labels = labels or DEFAULT_LABELS
154
  found: List[Entity] = _regex_entities(text)
155
+ if use_gliner and gliner_available():
156
  found.extend(_gliner_entities(text, labels, threshold))
157
  return _resolve_overlaps(found)