Spaces:
Build error
Build error
File size: 5,473 Bytes
456745c 919d160 456745c cf7c1ab 937b11d 919d160 639b844 5c6a8c9 cf7c1ab 833cc0b cf7c1ab 833cc0b cf7c1ab 833cc0b cf7c1ab 833cc0b cf7c1ab 833cc0b cf7c1ab 919d160 683a289 919d160 e4732ee 919d160 683a289 8005bb7 683a289 cf7c1ab ba21b98 cf7c1ab 8005bb7 833cc0b cf7c1ab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | # Importing as module.
import streamlit as st
import en_pipeline
from presidio_anonymizer import AnonymizerEngine
from presidio_analyzer import AnalyzerEngine, EntityRecognizer, RecognizerResult, Pattern, PatternRecognizer, AnalysisExplanation
from presidio_analyzer.nlp_engine import NlpArtifacts,NlpEngineProvider
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_analyzer.recognizer_registry import RecognizerRegistry
from presidio_analyzer.predefined_recognizers import SpacyRecognizer
import spacy
from spacy import displacy
nlp = en_pipeline.load()
supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"]
@st.cache_resource
def get_analyzer():
# https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities%20DEFAULT_ANOYNM_ENTITIES%20=%20[
supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"]
# using presidio default recognizer rules
analyzer = AnalyzerEngine()
# #uncomment below to add spacy predefined engines instead of default engine
# config = {
# 'nlp_engine_name': 'spacy',
# 'models': [
# {
# 'lang_code': 'en',
# 'model_name': 'en_core_web_sm'
# },
# ],
# 'ner_model_configuration': {
# 'labels_to_ignore': ['O'],
# 'model_to_presidio_entity_mapping': {
# 'PER': 'PERSON',
# 'LOC': 'LOCATION',
# 'DATE': 'DATE_TIME',
# 'GPE': 'LOCATION',
# 'PERSON': 'PERSON',
# 'TIME': 'DATE_TIME',
# },
# # 'low_confidence_score_multiplier': 0.4,
# # 'low_score_entity_names': ['ID', 'ORG']
# }
# }
# # Initialize the NLP engine with the recognizer registry
# provider = NlpEngineProvider(nlp_configuration=config)
# nlp_engine = provider.create_engine()
# # Create the recognizer registry
# registry = RecognizerRegistry()
# registry.load_predefined_recognizers()
# # Pass the created NLP engine and supported_languages to the AnalyzerEngine
# analyzer = AnalyzerEngine(
# nlp_engine=nlp_engine,
# supported_languages = "en",
# registry=registry
# )
# below mis useful when model to presidio mapping are same.
# Load spaCy model with transformers
nlp = spacy.load("en_pipeline")
# Integrate spaCy recognizer with Presidio
spacy_recognizer = SpacyRecognizer(nlp, supported_entities=supported_entities)
analyzer.registry.add_recognizer(spacy_recognizer)
return analyzer
analyzer = get_analyzer()
# Display a section header:
st.header("PII-Redaction")
# adding the text that will show in the text box as default
default_value = '''While traveling through New York, Emily received an email from her bank at emily.brown@example.com informing her about a suspicious activity on her credit card number 4532-8291-1283-9427. She quickly noted down the IP address 192.168.1.1 from which the transaction was attempted. Concerned, she called her bank at (800) 555-1234 and provided her US bank account number 1234567890123456 to verify her identity.
Earlier that day, on July 18, 2024, at 3 PM, Emily had also received an important document via email from her financial advisor, john.doe@finance.com, regarding her upcoming trip to Paris, France. The document included her US passport number 123456789 and instructions for her to keep a copy of her US driver license number A1234567 for identification purposes during her travels.
she enabled two-factor authentication on all her accounts and noted down her backup email, emily.backup@example.org, in case she needed to recover any information.'''
input_text = st.text_input("Enter your text...", default_value)
st.divider()
analyzer_results = analyzer.analyze(text=input_text, entities = supported_entities, language="en",return_decision_process=True,)
# Text Anonymizer
engine = AnonymizerEngine()
result = engine.anonymize(text=input_text, analyzer_results=analyzer_results)
# Restructuring anonymizer results
anonymization_results = {"anonymized": result.text,"found": [entity.to_dict() for entity in analyzer_results]}
anonym = anonymization_results['anonymized']
doc = nlp(input_text)
# # Display a section header:
# st.header("Dependency visualizer")`
# # style="dep" indicates dependencies should be generated.
# dep_svg = displacy.render(doc, style=”dep, jupyter=False)
# st.image(dep_svg, width=400, use_column_width=’never’)
col1, col2 = st.columns(2)
with col1:
# Add a section header:
st.header("Entity visualizer")
# Take the text from the input field and render the entity html.
# Note that style="ent" indicates entities.
ent_html = displacy.render(doc, style="ent", jupyter=False)
# Display the entity visualization in the browser:
st.markdown(ent_html, unsafe_allow_html=True)
with col2:
# Add a section header:
st.header("Entity Anonymizer")
# Display the entity visualization in the browser:
st.markdown(anonym, unsafe_allow_html=True)
|