# Importing as module.

import streamlit as st
import en_pipeline
from presidio_anonymizer import AnonymizerEngine
from presidio_analyzer import AnalyzerEngine, EntityRecognizer, RecognizerResult, Pattern, PatternRecognizer, AnalysisExplanation
from presidio_analyzer.nlp_engine import NlpArtifacts,NlpEngineProvider
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_analyzer.recognizer_registry import RecognizerRegistry
from presidio_analyzer.predefined_recognizers import SpacyRecognizer

import spacy
from spacy import displacy
nlp = en_pipeline.load()
supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"]

@st.cache_resource
def get_analyzer():
    # https://microsoft.github.io/presidio/supported_entities/#list-of-supported-entities%20DEFAULT_ANOYNM_ENTITIES%20=%20[
    supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"]
    
    # using presidio default recognizer rules
    analyzer = AnalyzerEngine()
    
    # #uncomment below to add spacy predefined engines instead of default engine
    # config = {
    #     'nlp_engine_name': 'spacy',
    #     'models': [
    #         {
    #             'lang_code': 'en',
    #             'model_name': 'en_core_web_sm'
    #         },
           
    #     ],
    #     'ner_model_configuration': {
    #         'labels_to_ignore': ['O'],
    #         'model_to_presidio_entity_mapping': {
    #             'PER': 'PERSON',
    #             'LOC': 'LOCATION',
    #             'DATE': 'DATE_TIME',
    #             'GPE': 'LOCATION',
    #           'PERSON': 'PERSON',
    #           'TIME': 'DATE_TIME',
    #         },
    #         # 'low_confidence_score_multiplier': 0.4,
    #         # 'low_score_entity_names': ['ID', 'ORG']
    #     }
    # }

    # # Initialize the NLP engine with the recognizer registry
    # provider = NlpEngineProvider(nlp_configuration=config)
    # nlp_engine = provider.create_engine()
    
    # # Create the recognizer registry
    # registry = RecognizerRegistry()
    # registry.load_predefined_recognizers()
    
    # # Pass the created NLP engine and supported_languages to the AnalyzerEngine
    # analyzer = AnalyzerEngine(
    #     nlp_engine=nlp_engine,
    #     supported_languages = "en",
    #     registry=registry
    # )
    
    # below mis useful when model to presidio mapping are same.
    # Load spaCy model with transformers
    nlp = spacy.load("en_pipeline")
    
    # Integrate spaCy recognizer with Presidio
    spacy_recognizer = SpacyRecognizer(nlp, supported_entities=supported_entities)
    analyzer.registry.add_recognizer(spacy_recognizer)

    return analyzer

analyzer = get_analyzer()
# Display a section header:
st.header("PII-Redaction")

# adding the text that will show in the text box as default
default_value = '''While traveling through New York, Emily received an email from her bank at emily.brown@example.com informing her about a suspicious activity on her credit card number 4532-8291-1283-9427. She quickly noted down the IP address 192.168.1.1 from which the transaction was attempted. Concerned, she called her bank at (800) 555-1234 and provided her US bank account number 1234567890123456 to verify her identity.

Earlier that day, on July 18, 2024, at 3 PM, Emily had also received an important document via email from her financial advisor, john.doe@finance.com, regarding her upcoming trip to Paris, France. The document included her US passport number 123456789 and instructions for her to keep a copy of her US driver license number A1234567 for identification purposes during her travels.

she enabled two-factor authentication on all her accounts and noted down her backup email, emily.backup@example.org, in case she needed to recover any information.'''


input_text = st.text_input("Enter your text...", default_value)


st.divider()

analyzer_results = analyzer.analyze(text=input_text, entities = supported_entities, language="en",return_decision_process=True,)
# Text Anonymizer
engine = AnonymizerEngine()
result = engine.anonymize(text=input_text, analyzer_results=analyzer_results)

# Restructuring anonymizer results
anonymization_results =  {"anonymized": result.text,"found": [entity.to_dict() for entity in analyzer_results]}
anonym = anonymization_results['anonymized']

doc = nlp(input_text)

# # Display a section header:
# st.header("Dependency visualizer")`
# # style="dep" indicates dependencies should be generated.
# dep_svg = displacy.render(doc, style=”dep, jupyter=False)
# st.image(dep_svg, width=400, use_column_width=’never’)
col1, col2 = st.columns(2)

with col1:
    # Add a section header:
    st.header("Entity visualizer")
    # Take the text from the input field and render the entity html.
    # Note that style="ent" indicates entities.
    ent_html = displacy.render(doc, style="ent", jupyter=False)
    # Display the entity visualization in the browser:
    st.markdown(ent_html, unsafe_allow_html=True)

with col2:
    # Add a section header:
    st.header("Entity Anonymizer")
    # Display the entity visualization in the browser:
    st.markdown(anonym, unsafe_allow_html=True)