Spaces:
Build error
Build error
| # Importing as module. | |
| import streamlit as st | |
| import en_pipeline | |
| from presidio_anonymizer import AnonymizerEngine | |
| from presidio_analyzer import AnalyzerEngine, EntityRecognizer, RecognizerResult, Pattern, PatternRecognizer, AnalysisExplanation | |
| from presidio_analyzer.nlp_engine import NlpArtifacts,NlpEngineProvider | |
| from presidio_analyzer import AnalyzerEngine | |
| from presidio_analyzer.nlp_engine import NlpEngineProvider | |
| from presidio_analyzer.recognizer_registry import RecognizerRegistry | |
| from presidio_analyzer.predefined_recognizers import SpacyRecognizer | |
| import spacy | |
| from spacy import displacy | |
| nlp = en_pipeline.load() | |
| supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"] | |
| def get_analyzer(): | |
| # https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities%20DEFAULT_ANOYNM_ENTITIES%20=%20[ | |
| supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"] | |
| # using presidio default recognizer rules | |
| analyzer = AnalyzerEngine() | |
| # #uncomment below to add spacy predefined engines instead of default engine | |
| # config = { | |
| # 'nlp_engine_name': 'spacy', | |
| # 'models': [ | |
| # { | |
| # 'lang_code': 'en', | |
| # 'model_name': 'en_core_web_sm' | |
| # }, | |
| # ], | |
| # 'ner_model_configuration': { | |
| # 'labels_to_ignore': ['O'], | |
| # 'model_to_presidio_entity_mapping': { | |
| # 'PER': 'PERSON', | |
| # 'LOC': 'LOCATION', | |
| # 'DATE': 'DATE_TIME', | |
| # 'GPE': 'LOCATION', | |
| # 'PERSON': 'PERSON', | |
| # 'TIME': 'DATE_TIME', | |
| # }, | |
| # # 'low_confidence_score_multiplier': 0.4, | |
| # # 'low_score_entity_names': ['ID', 'ORG'] | |
| # } | |
| # } | |
| # # Initialize the NLP engine with the recognizer registry | |
| # provider = NlpEngineProvider(nlp_configuration=config) | |
| # nlp_engine = provider.create_engine() | |
| # # Create the recognizer registry | |
| # registry = RecognizerRegistry() | |
| # registry.load_predefined_recognizers() | |
| # # Pass the created NLP engine and supported_languages to the AnalyzerEngine | |
| # analyzer = AnalyzerEngine( | |
| # nlp_engine=nlp_engine, | |
| # supported_languages = "en", | |
| # registry=registry | |
| # ) | |
| # below mis useful when model to presidio mapping are same. | |
| # Load spaCy model with transformers | |
| nlp = spacy.load("en_pipeline") | |
| # Integrate spaCy recognizer with Presidio | |
| spacy_recognizer = SpacyRecognizer(nlp, supported_entities=supported_entities) | |
| analyzer.registry.add_recognizer(spacy_recognizer) | |
| return analyzer | |
| analyzer = get_analyzer() | |
| # Display a section header: | |
| st.header("PII-Redaction") | |
| # adding the text that will show in the text box as default | |
| default_value = '''While traveling through New York, Emily received an email from her bank at emily.brown@example.com informing her about a suspicious activity on her credit card number 4532-8291-1283-9427. She quickly noted down the IP address 192.168.1.1 from which the transaction was attempted. Concerned, she called her bank at (800) 555-1234 and provided her US bank account number 1234567890123456 to verify her identity. | |
| Earlier that day, on July 18, 2024, at 3 PM, Emily had also received an important document via email from her financial advisor, john.doe@finance.com, regarding her upcoming trip to Paris, France. The document included her US passport number 123456789 and instructions for her to keep a copy of her US driver license number A1234567 for identification purposes during her travels. | |
| she enabled two-factor authentication on all her accounts and noted down her backup email, emily.backup@example.org, in case she needed to recover any information.''' | |
| input_text = st.text_input("Enter your text...", default_value) | |
| st.divider() | |
| analyzer_results = analyzer.analyze(text=input_text, entities = supported_entities, language="en",return_decision_process=True,) | |
| # Text Anonymizer | |
| engine = AnonymizerEngine() | |
| result = engine.anonymize(text=input_text, analyzer_results=analyzer_results) | |
| # Restructuring anonymizer results | |
| anonymization_results = {"anonymized": result.text,"found": [entity.to_dict() for entity in analyzer_results]} | |
| anonym = anonymization_results['anonymized'] | |
| doc = nlp(input_text) | |
| # # Display a section header: | |
| # st.header("Dependency visualizer")` | |
| # # style="dep" indicates dependencies should be generated. | |
| # dep_svg = displacy.render(doc, style=”dep, jupyter=False) | |
| # st.image(dep_svg, width=400, use_column_width=’never’) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Add a section header: | |
| st.header("Entity visualizer") | |
| # Take the text from the input field and render the entity html. | |
| # Note that style="ent" indicates entities. | |
| ent_html = displacy.render(doc, style="ent", jupyter=False) | |
| # Display the entity visualization in the browser: | |
| st.markdown(ent_html, unsafe_allow_html=True) | |
| with col2: | |
| # Add a section header: | |
| st.header("Entity Anonymizer") | |
| # Display the entity visualization in the browser: | |
| st.markdown(anonym, unsafe_allow_html=True) | |