Regulatory_ai / app.py
AndaiMD's picture
gradio app
7f0a9f7
# regulatory_ai_gradio.py
import re
import gradio as gr
# Optional Hugging Face NER
try:
from transformers import pipeline
ner_pipeline = pipeline("ner", grouped_entities=True)
HF_AVAILABLE = True
except Exception:
ner_pipeline = None
HF_AVAILABLE = False
KENYAN_HOSPITALS = [
"Kenyatta National Hospital",
"Moi Teaching and Referral Hospital",
"Aga Khan University Hospital",
]
KENYAN_COUNTIES = ["Nairobi", "Kisumu", "Mombasa", "Nakuru"]
REGEX_PATTERNS = {
"EMAIL": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
"PHONE": r"(\+254|0)(7|1)\d{8}",
"NHIF": r"NHIF\s?(No\.|Number)?\s?\d+",
"NAT_ID": r"\b\d{7,8}\b",
"NCT_ID": r"NCT\d{8}",
"DATE": r"\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b",
}
COMPLIANCE_RULES = {
"EMAIL": ("redact", "Direct contact identifier. Remove."),
"PHONE": ("redact", "Direct contact identifier. Remove."),
"NHIF": ("mask", "NHIF number is sensitive. Mask or pseudonymize."),
"NAT_ID": ("mask", "National ID is sensitive. Mask or pseudonymize."),
"NCT_ID": ("keep", "Public trial identifier. Keep unless policy dictates masking."),
"DATE": ("generalize", "Generalize dates (e.g., keep only year)."),
"ORG": ("generalize", "Institution names may indirectly identify. Generalize."),
"LOC": ("generalize", "Geographic info may re-identify. Generalize."),
"PER": ("redact", "Personal names are identifiers. Remove."),
}
# Define colors per label
ENTITY_COLORS = {
"EMAIL": "red",
"PHONE": "orange",
"NHIF": "purple",
"NAT_ID": "purple",
"NCT_ID": "green",
"DATE": "blue",
"ORG": "teal",
"LOC": "brown",
"PER": "pink",
}
def analyze_text(text, use_ner=False):
findings = []
# regex detections
for label, pattern in REGEX_PATTERNS.items():
for match in re.finditer(pattern, text):
findings.append({
"label": label,
"text": match.group(),
"span": match.span(),
"source": "regex"
})
# gazetteer detections
for org in KENYAN_HOSPITALS:
if org in text:
findings.append({"label": "ORG", "text": org,
"span": (text.index(org), text.index(org)+len(org)),
"source": "gazetteer"})
for county in KENYAN_COUNTIES:
if county in text:
findings.append({"label": "LOC", "text": county,
"span": (text.index(county), text.index(county)+len(county)),
"source": "gazetteer"})
# HF NER detections
if use_ner and HF_AVAILABLE:
ner_results = ner_pipeline(text)
for ent in ner_results:
findings.append({
"label": ent["entity_group"],
"text": ent["word"],
"span": (ent["start"], ent["end"]),
"score": ent["score"],
"source": "hf_ner"
})
# apply rules + sanitize + highlights
sanitized = text
notes = []
highlights = []
for f in sorted(findings, key=lambda x: -x["span"][0]): # backwards replacement
label = f["label"]
rule = COMPLIANCE_RULES.get(label)
if not rule:
continue
action, advice = rule
f["action"], f["advice"] = action, advice
# replacement for sanitized
if action == "redact":
replacement = f"[REDACTED {label}]"
elif action == "mask":
replacement = f"[MASKED: {'*' * (len(f['text'])-2)}{f['text'][-2:]}]"
elif action == "generalize":
replacement = f"[{label} (GENERALIZED)]"
else:
replacement = f["text"]
sanitized = sanitized[:f["span"][0]] + replacement + sanitized[f["span"][1]:]
notes.append(f"- [{label}] \"{f['text']}\" → {action.upper()} | {advice}")
highlights.append((f["text"], label))
return sanitized, highlights, "\n".join(notes) if notes else "No sensitive entities found."
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("## 🏥 Regulatory AI Demo (Kenya Context)\nPaste healthcare text to sanitize and get compliance notes.")
with gr.Row():
input_text = gr.Textbox(lines=8, label="Input Text")
with gr.Row():
use_ner = gr.Checkbox(label="Use Hugging Face NER (if available)", value=False)
with gr.Row():
btn = gr.Button("Analyze")
with gr.Row():
sanitized_output = gr.Textbox(lines=8, label="Sanitized Text")
with gr.Row():
highlighted_output = gr.HighlightedText(label="Detected Entities (Highlighted)")
with gr.Row():
notes_output = gr.Textbox(lines=8, label="Compliance Notes")
btn.click(analyze_text, inputs=[input_text, use_ner],
outputs=[sanitized_output, highlighted_output, notes_output])
if __name__ == "__main__":
demo.launch()