AIEcosystem commited on
Commit
c615f7e
·
verified ·
1 Parent(s): 92e76ee

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +23 -82
src/streamlit_app.py CHANGED
@@ -12,8 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
  st.markdown(
18
  """
19
  <style>
@@ -22,55 +20,42 @@ st.markdown(
22
  background-color: #E8F5E9; /* A very light green */
23
  color: #1B5E20; /* Dark green for the text */
24
  }
25
-
26
- /* Sidebar background color */
27
  .css-1d36184 {
28
  background-color: #A5D6A7; /* A medium light green */
29
  secondary-background-color: #A5D6A7;
30
  }
31
-
32
- /* Expander background color and header */
33
  .streamlit-expanderContent, .streamlit-expanderHeader {
34
  background-color: #E8F5E9;
35
  }
36
-
37
- /* Text Area background and text color */
38
  .stTextArea textarea {
39
  background-color: #81C784; /* A slightly darker medium green */
40
  color: #1B5E20; /* Dark green for text */
41
  }
42
-
43
- /* Button background and text color */
44
  .stButton > button {
45
  background-color: #81C784;
46
  color: #1B5E20;
47
  }
48
-
49
- /* Warning box background and text color */
50
  .stAlert.st-warning {
51
  background-color: #66BB6A; /* A medium-dark green for the warning box */
52
  color: #1B5E20;
53
  }
54
-
55
- /* Success box background and text color */
56
  .stAlert.st-success {
57
  background-color: #66BB6A; /* A medium-dark green for the success box */
58
  color: #1B5E20;
59
  }
60
  </style>
61
  """,
62
- unsafe_allow_html=True
63
- )
64
-
65
-
66
-
67
-
68
-
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
  st.subheader("PiiGuard", divider="violet")
72
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
73
-
74
  expander = st.expander("**Important notes**")
75
  expander.write("""**Named Entities:** This PiiGuard web app predicts fifty-one (51) labels: "person", "organization", "social_media_handle", "username", "insurance_company", "phone_number", "email", "email_address", "mobile_phone_number", "landline_phone_number", "fax_number", "credit_card_number", "credit_card_expiration_date", "credit_card_brand", "cvv", "cvc", "bank_account_number", "iban", "transaction_number", "cpf", "cnpj", "passport_number", "passport_expiration_date", "driver's_license_number", "tax_identification_number", "identity_card_number", "national_id_number", "identity_document_number", "birth_certificate_number", "social_security_number", "health_insurance_id_number", "health_insurance_number", "national_health_insurance_number", "student_id_number", "registration_number", "insurance_number", "serial_number", "visa_number", "reservation_number", "train_ticket_number", "medication", "medical_condition", "blood_type", "date_of_birth", "address", "ip_address", "postal_code", "flight_number", "license_plate_number", "vehicle_registration_number", "digital_signature"
76
 
@@ -78,25 +63,17 @@ Results are presented in easy-to-read tables, visualized in an interactive tree
78
 
79
  **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
80
 
81
- **Usage Limits:** You can request results unlimited times for one (1) month.
82
 
83
  **Supported Languages:** English
84
 
85
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
86
 
87
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
88
-
89
  with st.sidebar:
90
  st.write("Use the following code to embed the PiiGuard web app on your website. Feel free to adjust the width and height values to fit your page.")
91
  code = '''
92
- <iframe
93
- src="https://aiecosystem-piiguard.hf.space"
94
- frameborder="0"
95
- width="850"
96
- height="450"
97
- ></iframe>
98
-
99
-
100
  '''
101
  st.code(code, language="html")
102
  st.text("")
@@ -104,20 +81,15 @@ with st.sidebar:
104
  st.divider()
105
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
106
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
107
-
108
  # --- Comet ML Setup ---
109
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
110
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
111
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
112
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
113
-
114
  if not comet_initialized:
115
  st.warning("Comet ML not initialized. Check environment variables.")
116
-
117
  # --- Label Definitions ---
118
- labels = [
119
-
120
- "person",
121
  "organization",
122
  "social_media_handle",
123
  "username",
@@ -160,13 +132,9 @@ labels = [
160
  "ip_address",
161
  "postal_code", "flight_number",
162
  "license_plate_number",
163
- "vehicle_registration_number", "digital_signature"
164
- ]
165
-
166
-
167
  # Corrected mapping dictionary
168
- category_mapping = {
169
- "People_and_Groups": [
170
  "person",
171
  "organization",
172
  "social_media_handle",
@@ -232,15 +200,9 @@ category_mapping = {
232
  ],
233
  "Digital_and_Security": [
234
  "digital_signature"
235
- ]
236
- }
237
-
238
-
239
-
240
-
241
  # --- Model Loading ---
242
- @st.cache_resource
243
- def load_ner_model():
244
  """Loads the GLiNER model and caches it."""
245
  try:
246
  return GLiNER.from_pretrained("knowledgator/gliner-multitask-v1.0", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
@@ -248,30 +210,28 @@ def load_ner_model():
248
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
249
  st.stop()
250
  model = load_ner_model()
251
-
252
  # Flatten the mapping to a single dictionary
253
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
254
-
255
  # --- Text Input and Clear Button ---
256
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
257
-
 
 
258
  def clear_text():
259
  """Clears the text area."""
260
  st.session_state['my_text_area'] = ""
261
-
262
  st.button("Clear text", on_click=clear_text)
263
-
264
-
265
  # --- Results Section ---
266
  if st.button("Results"):
267
  start_time = time.time()
268
  if not text.strip():
269
  st.warning("Please enter some text to extract entities.")
 
 
270
  else:
271
  with st.spinner("Extracting entities...", show_time=True):
272
  entities = model.predict_entities(text, labels)
273
  df = pd.DataFrame(entities)
274
-
275
  if not df.empty:
276
  df['category'] = df['label'].map(reverse_category_mapping)
277
  if comet_initialized:
@@ -282,13 +242,10 @@ if st.button("Results"):
282
  )
283
  experiment.log_parameter("input_text", text)
284
  experiment.log_table("predicted_entities", df)
285
-
286
  st.subheader("Grouped Entities by Category", divider = "violet")
287
-
288
  # Create tabs for each category
289
  category_names = sorted(list(category_mapping.keys()))
290
  category_tabs = st.tabs(category_names)
291
-
292
  for i, category_name in enumerate(category_names):
293
  with category_tabs[i]:
294
  df_category_filtered = df[df['category'] == category_name]
@@ -296,9 +253,6 @@ if st.button("Results"):
296
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
297
  else:
298
  st.info(f"No entities found for the '{category_name}' category.")
299
-
300
-
301
-
302
  with st.expander("See Glossary of tags"):
303
  st.write('''
304
  - **text**: ['entity extracted from your text data']
@@ -308,18 +262,15 @@ if st.button("Results"):
308
  - **end**: ['index of the end of the corresponding entity']
309
  ''')
310
  st.divider()
311
-
312
  # Tree map
313
  st.subheader("Tree map", divider = "violet")
314
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
315
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E8F5E9', plot_bgcolor='#E8F5E9')
316
  st.plotly_chart(fig_treemap)
317
-
318
  # Pie and Bar charts
319
  grouped_counts = df['category'].value_counts().reset_index()
320
  grouped_counts.columns = ['category', 'count']
321
  col1, col2 = st.columns(2)
322
-
323
  with col1:
324
  st.subheader("Pie chart", divider = "violet")
325
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -329,10 +280,6 @@ if st.button("Results"):
329
  plot_bgcolor='#E8F5E9'
330
  )
331
  st.plotly_chart(fig_pie)
332
-
333
-
334
-
335
-
336
  with col2:
337
  st.subheader("Bar chart", divider = "violet")
338
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -341,7 +288,6 @@ if st.button("Results"):
341
  plot_bgcolor='#E8F5E9'
342
  )
343
  st.plotly_chart(fig_bar)
344
-
345
  # Most Frequent Entities
346
  st.subheader("Most Frequent Entities", divider="gray")
347
  word_counts = df['text'].value_counts().reset_index()
@@ -356,10 +302,8 @@ if st.button("Results"):
356
  st.plotly_chart(fig_repeating_bar)
357
  else:
358
  st.warning("No entities were found that occur more than once.")
359
-
360
  # Download Section
361
  st.divider()
362
-
363
  dfa = pd.DataFrame(
364
  data={
365
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -369,7 +313,6 @@ if st.button("Results"):
369
  'accuracy score; how accurately a tag has been assigned to a given entity',
370
  'index of the start of the corresponding entity',
371
  'index of the end of the corresponding entity',
372
-
373
  ]
374
  }
375
  )
@@ -377,7 +320,6 @@ if st.button("Results"):
377
  with zipfile.ZipFile(buf, "w") as myzip:
378
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
379
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
380
-
381
  with stylable_container(
382
  key="download_button",
383
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -388,15 +330,14 @@ if st.button("Results"):
388
  file_name="nlpblogs_results.zip",
389
  mime="application/zip",
390
  )
391
-
392
  if comet_initialized:
393
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
394
  experiment.end()
395
  else: # If df is empty
396
  st.warning("No entities were found in the provided text.")
397
-
398
- end_time = time.time()
399
  elapsed_time = end_time - start_time
400
  st.text("")
401
  st.text("")
402
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
15
  st.markdown(
16
  """
17
  <style>
 
20
  background-color: #E8F5E9; /* A very light green */
21
  color: #1B5E20; /* Dark green for the text */
22
  }
23
+ /* Sidebar background color */
 
24
  .css-1d36184 {
25
  background-color: #A5D6A7; /* A medium light green */
26
  secondary-background-color: #A5D6A7;
27
  }
28
+ /* Expander background color and header */
 
29
  .streamlit-expanderContent, .streamlit-expanderHeader {
30
  background-color: #E8F5E9;
31
  }
32
+ /* Text Area background and text color */
 
33
  .stTextArea textarea {
34
  background-color: #81C784; /* A slightly darker medium green */
35
  color: #1B5E20; /* Dark green for text */
36
  }
37
+ /* Button background and text color */
 
38
  .stButton > button {
39
  background-color: #81C784;
40
  color: #1B5E20;
41
  }
42
+ /* Warning box background and text color */
 
43
  .stAlert.st-warning {
44
  background-color: #66BB6A; /* A medium-dark green for the warning box */
45
  color: #1B5E20;
46
  }
47
+ /* Success box background and text color */
 
48
  .stAlert.st-success {
49
  background-color: #66BB6A; /* A medium-dark green for the success box */
50
  color: #1B5E20;
51
  }
52
  </style>
53
  """,
54
+ unsafe_allow_html=True)
 
 
 
 
 
 
55
  # --- Page Configuration and UI Elements ---
56
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
57
  st.subheader("PiiGuard", divider="violet")
58
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
59
  expander = st.expander("**Important notes**")
60
  expander.write("""**Named Entities:** This PiiGuard web app predicts fifty-one (51) labels: "person", "organization", "social_media_handle", "username", "insurance_company", "phone_number", "email", "email_address", "mobile_phone_number", "landline_phone_number", "fax_number", "credit_card_number", "credit_card_expiration_date", "credit_card_brand", "cvv", "cvc", "bank_account_number", "iban", "transaction_number", "cpf", "cnpj", "passport_number", "passport_expiration_date", "driver's_license_number", "tax_identification_number", "identity_card_number", "national_id_number", "identity_document_number", "birth_certificate_number", "social_security_number", "health_insurance_id_number", "health_insurance_number", "national_health_insurance_number", "student_id_number", "registration_number", "insurance_number", "serial_number", "visa_number", "reservation_number", "train_ticket_number", "medication", "medical_condition", "blood_type", "date_of_birth", "address", "ip_address", "postal_code", "flight_number", "license_plate_number", "vehicle_registration_number", "digital_signature"
61
 
 
63
 
64
  **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
65
 
66
+ **Usage Limits:** You can request results unlimited times for one (1) month.
67
 
68
  **Supported Languages:** English
69
 
70
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
71
 
72
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
73
  with st.sidebar:
74
  st.write("Use the following code to embed the PiiGuard web app on your website. Feel free to adjust the width and height values to fit your page.")
75
  code = '''
76
+ <iframe src="https://aiecosystem-piiguard.hf.space" frameborder="0" width="850" height="450" ></iframe>
 
 
 
 
 
 
 
77
  '''
78
  st.code(code, language="html")
79
  st.text("")
 
81
  st.divider()
82
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
83
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
84
  # --- Comet ML Setup ---
85
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
86
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
87
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
88
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
89
  if not comet_initialized:
90
  st.warning("Comet ML not initialized. Check environment variables.")
 
91
  # --- Label Definitions ---
92
+ labels = [ "person",
 
 
93
  "organization",
94
  "social_media_handle",
95
  "username",
 
132
  "ip_address",
133
  "postal_code", "flight_number",
134
  "license_plate_number",
135
+ "vehicle_registration_number", "digital_signature"]
 
 
 
136
  # Corrected mapping dictionary
137
+ category_mapping = { "People_and_Groups": [
 
138
  "person",
139
  "organization",
140
  "social_media_handle",
 
200
  ],
201
  "Digital_and_Security": [
202
  "digital_signature"
203
+ ]}
 
 
 
 
 
204
  # --- Model Loading ---
205
+ @st.cache_resourcedef load_ner_model():
 
206
  """Loads the GLiNER model and caches it."""
207
  try:
208
  return GLiNER.from_pretrained("knowledgator/gliner-multitask-v1.0", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
 
210
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
211
  st.stop()
212
  model = load_ner_model()
 
213
  # Flatten the mapping to a single dictionary
214
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
215
  # --- Text Input and Clear Button ---
216
+ word_limit = 200
217
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
218
+ word_count = len(text.split())
219
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
220
  def clear_text():
221
  """Clears the text area."""
222
  st.session_state['my_text_area'] = ""
 
223
  st.button("Clear text", on_click=clear_text)
 
 
224
  # --- Results Section ---
225
  if st.button("Results"):
226
  start_time = time.time()
227
  if not text.strip():
228
  st.warning("Please enter some text to extract entities.")
229
+ elif word_count > word_limit:
230
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
231
  else:
232
  with st.spinner("Extracting entities...", show_time=True):
233
  entities = model.predict_entities(text, labels)
234
  df = pd.DataFrame(entities)
 
235
  if not df.empty:
236
  df['category'] = df['label'].map(reverse_category_mapping)
237
  if comet_initialized:
 
242
  )
243
  experiment.log_parameter("input_text", text)
244
  experiment.log_table("predicted_entities", df)
 
245
  st.subheader("Grouped Entities by Category", divider = "violet")
 
246
  # Create tabs for each category
247
  category_names = sorted(list(category_mapping.keys()))
248
  category_tabs = st.tabs(category_names)
 
249
  for i, category_name in enumerate(category_names):
250
  with category_tabs[i]:
251
  df_category_filtered = df[df['category'] == category_name]
 
253
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
254
  else:
255
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
256
  with st.expander("See Glossary of tags"):
257
  st.write('''
258
  - **text**: ['entity extracted from your text data']
 
262
  - **end**: ['index of the end of the corresponding entity']
263
  ''')
264
  st.divider()
 
265
  # Tree map
266
  st.subheader("Tree map", divider = "violet")
267
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
268
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E8F5E9', plot_bgcolor='#E8F5E9')
269
  st.plotly_chart(fig_treemap)
 
270
  # Pie and Bar charts
271
  grouped_counts = df['category'].value_counts().reset_index()
272
  grouped_counts.columns = ['category', 'count']
273
  col1, col2 = st.columns(2)
 
274
  with col1:
275
  st.subheader("Pie chart", divider = "violet")
276
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
280
  plot_bgcolor='#E8F5E9'
281
  )
282
  st.plotly_chart(fig_pie)
 
 
 
 
283
  with col2:
284
  st.subheader("Bar chart", divider = "violet")
285
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
288
  plot_bgcolor='#E8F5E9'
289
  )
290
  st.plotly_chart(fig_bar)
 
291
  # Most Frequent Entities
292
  st.subheader("Most Frequent Entities", divider="gray")
293
  word_counts = df['text'].value_counts().reset_index()
 
302
  st.plotly_chart(fig_repeating_bar)
303
  else:
304
  st.warning("No entities were found that occur more than once.")
 
305
  # Download Section
306
  st.divider()
 
307
  dfa = pd.DataFrame(
308
  data={
309
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
313
  'accuracy score; how accurately a tag has been assigned to a given entity',
314
  'index of the start of the corresponding entity',
315
  'index of the end of the corresponding entity',
 
316
  ]
317
  }
318
  )
 
320
  with zipfile.ZipFile(buf, "w") as myzip:
321
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
322
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
323
  with stylable_container(
324
  key="download_button",
325
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
330
  file_name="nlpblogs_results.zip",
331
  mime="application/zip",
332
  )
 
333
  if comet_initialized:
334
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
335
  experiment.end()
336
  else: # If df is empty
337
  st.warning("No entities were found in the provided text.")
338
+ end_time = time.time()
 
339
  elapsed_time = end_time - start_time
340
  st.text("")
341
  st.text("")
342
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
343
+