Spaces:
Sleeping
Sleeping
JDFPalladium
cleaning up organization of scripts and data and updating filepaths in app to processed data
389c5f0 | from pathlib import Path | |
| import re | |
| from .helpers import dateparser_detect, describe_relative_date | |
| def load_kenyan_names(filepath="data/processed/kenyan_names.txt"): | |
| if not Path(filepath).exists(): | |
| return set() | |
| with open(filepath, "r", encoding="utf-8") as f: | |
| return set(line.strip().lower() for line in f if line.strip()) | |
| kenyan_names = load_kenyan_names() | |
| def name_list_detect(text_names): | |
| words = re.findall(r"\b\w+\b", text_names) | |
| matches = [w for w in words if w.lower() in kenyan_names] | |
| return matches | |
| def detect_and_redact_phi(text_input): | |
| names_found = name_list_detect(text_input) | |
| dates_found = dateparser_detect(text_input) | |
| phi_detected = bool(names_found or dates_found) | |
| for match, dt in dates_found: | |
| relative = describe_relative_date(dt) | |
| text_input = text_input.replace(match, relative) | |
| for name in names_found: | |
| pattern = re.compile(rf"\b{name}\b", re.IGNORECASE) | |
| text_input = pattern.sub("[name]", text_input) | |
| return { | |
| "phi_detected": phi_detected, | |
| "kenyan_name_matches": names_found, | |
| "dates": [d[0] for d in dates_found], | |
| "redacted_text": text_input, | |
| } | |
| if __name__ == "__main__": | |
| print("\n🔍 PHI Detection Tool (Kenyan context + redaction with relative dates)\n") | |
| while True: | |
| text = input("Enter clinical text (or 'q' to quit):\n> ") | |
| if text.lower() == "q": | |
| break | |
| results = detect_and_redact_phi(text) | |
| if results["phi_detected"]: | |
| print("\n⚠️ Possible PHI detected!") | |
| if results["kenyan_name_matches"]: | |
| print(" - Possible Kenyan names:", results["kenyan_name_matches"]) | |
| if results["dates"]: | |
| print(" - Dates detected:", results["dates"]) | |
| print("\n🛡️ Redacted text:") | |
| print(results["redacted_text"]) | |
| else: | |
| print("\n✅ No PHI detected.") | |
| print("\n---\n") | |