Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,47 +20,47 @@ def get_analyzer():
|
|
| 20 |
supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"]
|
| 21 |
|
| 22 |
# using presidio default recognizer rules
|
| 23 |
-
|
| 24 |
|
| 25 |
-
#uncomment below to add spacy predefined engines instead of default engine
|
| 26 |
-
config = {
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
# Initialize the NLP engine with the recognizer registry
|
| 51 |
-
provider = NlpEngineProvider(nlp_configuration=config)
|
| 52 |
-
nlp_engine = provider.create_engine()
|
| 53 |
|
| 54 |
-
# Create the recognizer registry
|
| 55 |
-
registry = RecognizerRegistry()
|
| 56 |
-
registry.load_predefined_recognizers()
|
| 57 |
|
| 58 |
-
# Pass the created NLP engine and supported_languages to the AnalyzerEngine
|
| 59 |
-
analyzer = AnalyzerEngine(
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
)
|
| 64 |
|
| 65 |
# below mis useful when model to presidio mapping are same.
|
| 66 |
# Load spaCy model with transformers
|
|
@@ -106,20 +106,21 @@ doc = nlp(input_text)
|
|
| 106 |
# # style="dep" indicates dependencies should be generated.
|
| 107 |
# dep_svg = displacy.render(doc, style=”dep, jupyter=False)
|
| 108 |
# st.image(dep_svg, width=400, use_column_width=’never’)
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
#
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
#
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
st.
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
| 124 |
|
| 125 |
|
|
|
|
| 20 |
supported_entities = ["CREDIT_CARD","DATE_TIME","EMAIL_ADDRESS","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","PERSON","PHONE_NUMBER","URL","US_BANK_NUMBER","US_DRIVER_LICENSE","US_PASSPORT","US_SSN","US_ITIN"]
|
| 21 |
|
| 22 |
# using presidio default recognizer rules
|
| 23 |
+
analyzer = AnalyzerEngine()
|
| 24 |
|
| 25 |
+
# #uncomment below to add spacy predefined engines instead of default engine
|
| 26 |
+
# config = {
|
| 27 |
+
# 'nlp_engine_name': 'spacy',
|
| 28 |
+
# 'models': [
|
| 29 |
+
# {
|
| 30 |
+
# 'lang_code': 'en',
|
| 31 |
+
# 'model_name': 'en_core_web_sm'
|
| 32 |
+
# },
|
| 33 |
|
| 34 |
+
# ],
|
| 35 |
+
# 'ner_model_configuration': {
|
| 36 |
+
# 'labels_to_ignore': ['O'],
|
| 37 |
+
# 'model_to_presidio_entity_mapping': {
|
| 38 |
+
# 'PER': 'PERSON',
|
| 39 |
+
# 'LOC': 'LOCATION',
|
| 40 |
+
# 'DATE': 'DATE_TIME',
|
| 41 |
+
# 'GPE': 'LOCATION',
|
| 42 |
+
# 'PERSON': 'PERSON',
|
| 43 |
+
# 'TIME': 'DATE_TIME',
|
| 44 |
+
# },
|
| 45 |
+
# # 'low_confidence_score_multiplier': 0.4,
|
| 46 |
+
# # 'low_score_entity_names': ['ID', 'ORG']
|
| 47 |
+
# }
|
| 48 |
+
# }
|
| 49 |
+
|
| 50 |
+
# # Initialize the NLP engine with the recognizer registry
|
| 51 |
+
# provider = NlpEngineProvider(nlp_configuration=config)
|
| 52 |
+
# nlp_engine = provider.create_engine()
|
| 53 |
|
| 54 |
+
# # Create the recognizer registry
|
| 55 |
+
# registry = RecognizerRegistry()
|
| 56 |
+
# registry.load_predefined_recognizers()
|
| 57 |
|
| 58 |
+
# # Pass the created NLP engine and supported_languages to the AnalyzerEngine
|
| 59 |
+
# analyzer = AnalyzerEngine(
|
| 60 |
+
# nlp_engine=nlp_engine,
|
| 61 |
+
# supported_languages = "en",
|
| 62 |
+
# registry=registry
|
| 63 |
+
# )
|
| 64 |
|
| 65 |
# below mis useful when model to presidio mapping are same.
|
| 66 |
# Load spaCy model with transformers
|
|
|
|
| 106 |
# # style="dep" indicates dependencies should be generated.
|
| 107 |
# dep_svg = displacy.render(doc, style=”dep, jupyter=False)
|
| 108 |
# st.image(dep_svg, width=400, use_column_width=’never’)
|
| 109 |
+
col1, col2 = st.columns(2)
|
| 110 |
+
|
| 111 |
+
with col1:
|
| 112 |
+
# Add a section header:
|
| 113 |
+
st.header("Entity visualizer")
|
| 114 |
+
# Take the text from the input field and render the entity html.
|
| 115 |
+
# Note that style="ent" indicates entities.
|
| 116 |
+
ent_html = displacy.render(doc, style="ent", jupyter=False)
|
| 117 |
+
# Display the entity visualization in the browser:
|
| 118 |
+
st.markdown(ent_html, unsafe_allow_html=True)
|
| 119 |
+
|
| 120 |
+
with col2:
|
| 121 |
+
# Add a section header:
|
| 122 |
+
st.header("Entity Anonymizer")
|
| 123 |
+
# Display the entity visualization in the browser:
|
| 124 |
+
st.markdown(anonym, unsafe_allow_html=True)
|
| 125 |
|
| 126 |
|