phi-span-detector-deberta-v3 / example_redaction.py
bharathjanumpally's picture
Improve model card, metadata, metrics, and usage example
f618ccc verified
from transformers import pipeline
MODEL_ID = "bharathjanumpally/phi-span-detector-deberta-v3"
def redact_text(text: str) -> tuple[list[dict], str]:
ner = pipeline(
"token-classification",
model=MODEL_ID,
aggregation_strategy="simple",
)
spans = ner(text)
redacted = text
for item in sorted(spans, key=lambda x: x["start"], reverse=True):
label = item["entity_group"]
redacted = redacted[: item["start"]] + f"[{label}]" + redacted[item["end"] :]
return spans, redacted
if __name__ == "__main__":
sample = (
"Patient John Smith (MRN: 001-23-4567) visited "
"Boston Medical Center on 12/19/2025."
)
spans, redacted = redact_text(sample)
print("Spans:")
for span in spans:
print(span)
print()
print("Redacted:")
print(redacted)