from transformers import pipeline MODEL_ID = "bharathjanumpally/phi-span-detector-deberta-v3" def redact_text(text: str) -> tuple[list[dict], str]: ner = pipeline( "token-classification", model=MODEL_ID, aggregation_strategy="simple", ) spans = ner(text) redacted = text for item in sorted(spans, key=lambda x: x["start"], reverse=True): label = item["entity_group"] redacted = redacted[: item["start"]] + f"[{label}]" + redacted[item["end"] :] return spans, redacted if __name__ == "__main__": sample = ( "Patient John Smith (MRN: 001-23-4567) visited " "Boston Medical Center on 12/19/2025." ) spans, redacted = redact_text(sample) print("Spans:") for span in spans: print(span) print() print("Redacted:") print(redacted)