AIEcosystem commited on
Commit
ed441b9
·
verified ·
1 Parent(s): d14fb24

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -69
src/streamlit_app.py CHANGED
@@ -12,9 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
-
18
  st.markdown(
19
  """
20
  <style>
@@ -23,36 +20,30 @@ st.markdown(
23
  background-color: #F3E5F5; /* A very light purple */
24
  color: #1A0A26; /* Dark purple for the text */
25
  }
26
-
27
  /* Sidebar background color */
28
  .css-1d36184 {
29
  background-color: #D1C4E9; /* A medium light purple */
30
  secondary-background-color: #D1C4E9;
31
  }
32
-
33
  /* Expander background color and header */
34
  .streamlit-expanderContent, .streamlit-expanderHeader {
35
  background-color: #F3E5F5;
36
  }
37
-
38
  /* Text Area background and text color */
39
  .stTextArea textarea {
40
  background-color: #B39DDB; /* A slightly darker medium purple */
41
  color: #1A0A26; /* Dark purple for text */
42
  }
43
-
44
  /* Button background and text color */
45
  .stButton > button {
46
  background-color: #B39DDB;
47
  color: #1A0A26;
48
  }
49
-
50
  /* Warning box background and text color */
51
  .stAlert.st-warning {
52
  background-color: #9575CD; /* A medium-dark purple for the warning box */
53
  color: #1A0A26;
54
  }
55
-
56
  /* Success box background and text color */
57
  .stAlert.st-success {
58
  background-color: #9575CD; /* A medium-dark purple for the success box */
@@ -60,19 +51,13 @@ st.markdown(
60
  }
61
  </style>
62
  """,
63
- unsafe_allow_html=True
64
- )
65
-
66
-
67
-
68
-
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
  st.subheader("MediaTagger", divider="violet")
72
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
73
  expander = st.expander("**Important notes**")
74
- expander.write("""**Named Entities:** This MediaTagger web app predicts eighteen (18) labels: 'person', 'organization', 'location', 'date', 'time', 'event', 'title', 'product', 'law', 'policy', 'work of art', 'geopolitical entity', 'number', 'cause of death',
75
- 'weapon', 'vehicle', 'facility', 'temporal expression'
76
 
77
  Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
78
 
@@ -89,14 +74,8 @@ For any errors or inquiries, please contact us at info@nlpblogs.com""")
89
  with st.sidebar:
90
  st.write("Use the following code to embed the MediaTagger web app on your website. Feel free to adjust the width and height values to fit your page.")
91
  code = '''
92
- <iframe
93
- src="https://aiecosystem-mediatagger.hf.space"
94
- frameborder="0"
95
- width="850"
96
- height="450"
97
  ></iframe>
98
-
99
-
100
  '''
101
  st.code(code, language="html")
102
  st.text("")
@@ -104,16 +83,13 @@ with st.sidebar:
104
  st.divider()
105
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
106
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
107
-
108
  # --- Comet ML Setup ---
109
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
110
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
111
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
112
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
113
-
114
  if not comet_initialized:
115
  st.warning("Comet ML not initialized. Check environment variables.")
116
-
117
  # --- Label Definitions ---
118
  labels = [
119
  'person',
@@ -135,26 +111,16 @@ labels = [
135
  'facility',
136
  'temporal expression',
137
  ]
138
-
139
-
140
  # Corrected mapping dictionary
141
-
142
  # Create a mapping dictionary for labels to categories
143
  category_mapping = {
144
  "People & Groups": ["person", "organization", "title"],
145
  "Topics & Objects": ["event", "product", "law", "policy", "work of art", "weapon", "vehicle"],
146
  "Temporal": ["date", "time", "temporal expression"],
147
  "Locations": ["location", "geopolitical entity", "facility"],
148
- "Quantitative & Contextual": ["number", "cause of death"]
149
- }
150
-
151
-
152
-
153
-
154
-
155
  # --- Model Loading ---
156
- @st.cache_resource
157
- def load_ner_model():
158
  """Loads the GLiNER model and caches it."""
159
  try:
160
  return GLiNER.from_pretrained("EmergentMethods/gliner_large_news-v2.1", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
@@ -162,30 +128,28 @@ def load_ner_model():
162
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
163
  st.stop()
164
  model = load_ner_model()
165
-
166
  # Flatten the mapping to a single dictionary
167
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
168
-
169
  # --- Text Input and Clear Button ---
170
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
171
-
 
 
172
  def clear_text():
173
  """Clears the text area."""
174
  st.session_state['my_text_area'] = ""
175
-
176
  st.button("Clear text", on_click=clear_text)
177
-
178
-
179
  # --- Results Section ---
180
  if st.button("Results"):
181
  start_time = time.time()
182
  if not text.strip():
183
  st.warning("Please enter some text to extract entities.")
 
 
184
  else:
185
  with st.spinner("Extracting entities...", show_time=True):
186
  entities = model.predict_entities(text, labels)
187
  df = pd.DataFrame(entities)
188
-
189
  if not df.empty:
190
  df['category'] = df['label'].map(reverse_category_mapping)
191
  if comet_initialized:
@@ -196,13 +160,10 @@ if st.button("Results"):
196
  )
197
  experiment.log_parameter("input_text", text)
198
  experiment.log_table("predicted_entities", df)
199
-
200
  st.subheader("Grouped Entities by Category", divider = "violet")
201
-
202
  # Create tabs for each category
203
  category_names = sorted(list(category_mapping.keys()))
204
  category_tabs = st.tabs(category_names)
205
-
206
  for i, category_name in enumerate(category_names):
207
  with category_tabs[i]:
208
  df_category_filtered = df[df['category'] == category_name]
@@ -210,9 +171,6 @@ if st.button("Results"):
210
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
211
  else:
212
  st.info(f"No entities found for the '{category_name}' category.")
213
-
214
-
215
-
216
  with st.expander("See Glossary of tags"):
217
  st.write('''
218
  - **text**: ['entity extracted from your text data']
@@ -222,18 +180,15 @@ if st.button("Results"):
222
  - **end**: ['index of the end of the corresponding entity']
223
  ''')
224
  st.divider()
225
-
226
  # Tree map
227
  st.subheader("Tree map", divider = "violet")
228
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
229
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
230
  st.plotly_chart(fig_treemap)
231
-
232
  # Pie and Bar charts
233
  grouped_counts = df['category'].value_counts().reset_index()
234
  grouped_counts.columns = ['category', 'count']
235
  col1, col2 = st.columns(2)
236
-
237
  with col1:
238
  st.subheader("Pie chart", divider = "violet")
239
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -243,10 +198,6 @@ if st.button("Results"):
243
  plot_bgcolor='#F3E5F5'
244
  )
245
  st.plotly_chart(fig_pie)
246
-
247
-
248
-
249
-
250
  with col2:
251
  st.subheader("Bar chart", divider = "violet")
252
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -255,7 +206,6 @@ if st.button("Results"):
255
  plot_bgcolor='#F3E5F5'
256
  )
257
  st.plotly_chart(fig_bar)
258
-
259
  # Most Frequent Entities
260
  st.subheader("Most Frequent Entities", divider="violet")
261
  word_counts = df['text'].value_counts().reset_index()
@@ -270,10 +220,8 @@ if st.button("Results"):
270
  st.plotly_chart(fig_repeating_bar)
271
  else:
272
  st.warning("No entities were found that occur more than once.")
273
-
274
  # Download Section
275
  st.divider()
276
-
277
  dfa = pd.DataFrame(
278
  data={
279
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -283,7 +231,6 @@ if st.button("Results"):
283
  'accuracy score; how accurately a tag has been assigned to a given entity',
284
  'index of the start of the corresponding entity',
285
  'index of the end of the corresponding entity',
286
-
287
  ]
288
  }
289
  )
@@ -291,7 +238,6 @@ if st.button("Results"):
291
  with zipfile.ZipFile(buf, "w") as myzip:
292
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
293
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
294
-
295
  with stylable_container(
296
  key="download_button",
297
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -302,15 +248,13 @@ if st.button("Results"):
302
  file_name="nlpblogs_results.zip",
303
  mime="application/zip",
304
  )
305
-
306
  if comet_initialized:
307
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
308
  experiment.end()
309
  else: # If df is empty
310
  st.warning("No entities were found in the provided text.")
311
-
312
- end_time = time.time()
313
  elapsed_time = end_time - start_time
314
  st.text("")
315
  st.text("")
316
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
 
15
  st.markdown(
16
  """
17
  <style>
 
20
  background-color: #F3E5F5; /* A very light purple */
21
  color: #1A0A26; /* Dark purple for the text */
22
  }
 
23
  /* Sidebar background color */
24
  .css-1d36184 {
25
  background-color: #D1C4E9; /* A medium light purple */
26
  secondary-background-color: #D1C4E9;
27
  }
 
28
  /* Expander background color and header */
29
  .streamlit-expanderContent, .streamlit-expanderHeader {
30
  background-color: #F3E5F5;
31
  }
 
32
  /* Text Area background and text color */
33
  .stTextArea textarea {
34
  background-color: #B39DDB; /* A slightly darker medium purple */
35
  color: #1A0A26; /* Dark purple for text */
36
  }
 
37
  /* Button background and text color */
38
  .stButton > button {
39
  background-color: #B39DDB;
40
  color: #1A0A26;
41
  }
 
42
  /* Warning box background and text color */
43
  .stAlert.st-warning {
44
  background-color: #9575CD; /* A medium-dark purple for the warning box */
45
  color: #1A0A26;
46
  }
 
47
  /* Success box background and text color */
48
  .stAlert.st-success {
49
  background-color: #9575CD; /* A medium-dark purple for the success box */
 
51
  }
52
  </style>
53
  """,
54
+ unsafe_allow_html=True)
 
 
 
 
 
55
  # --- Page Configuration and UI Elements ---
56
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
57
  st.subheader("MediaTagger", divider="violet")
58
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
59
  expander = st.expander("**Important notes**")
60
+ expander.write("""**Named Entities:** This MediaTagger web app predicts eighteen (18) labels: 'person', 'organization', 'location', 'date', 'time', 'event', 'title', 'product', 'law', 'policy', 'work of art', 'geopolitical entity', 'number', 'cause of death','weapon', 'vehicle', 'facility', 'temporal expression'
 
61
 
62
  Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
63
 
 
74
  with st.sidebar:
75
  st.write("Use the following code to embed the MediaTagger web app on your website. Feel free to adjust the width and height values to fit your page.")
76
  code = '''
77
+ <iframe src="https://aiecosystem-mediatagger.hf.space" frameborder="0" width="850" height="450"
 
 
 
 
78
  ></iframe>
 
 
79
  '''
80
  st.code(code, language="html")
81
  st.text("")
 
83
  st.divider()
84
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
85
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
86
  # --- Comet ML Setup ---
87
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
88
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
89
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
90
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
91
  if not comet_initialized:
92
  st.warning("Comet ML not initialized. Check environment variables.")
 
93
  # --- Label Definitions ---
94
  labels = [
95
  'person',
 
111
  'facility',
112
  'temporal expression',
113
  ]
 
 
114
  # Corrected mapping dictionary
 
115
  # Create a mapping dictionary for labels to categories
116
  category_mapping = {
117
  "People & Groups": ["person", "organization", "title"],
118
  "Topics & Objects": ["event", "product", "law", "policy", "work of art", "weapon", "vehicle"],
119
  "Temporal": ["date", "time", "temporal expression"],
120
  "Locations": ["location", "geopolitical entity", "facility"],
121
+ "Quantitative & Contextual": ["number", "cause of death"]}
 
 
 
 
 
 
122
  # --- Model Loading ---
123
+ @st.cache_resourcedef load_ner_model():
 
124
  """Loads the GLiNER model and caches it."""
125
  try:
126
  return GLiNER.from_pretrained("EmergentMethods/gliner_large_news-v2.1", nested_ner=True, num_gen_sequences=2, gen_constraints= labels)
 
128
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
129
  st.stop()
130
  model = load_ner_model()
 
131
  # Flatten the mapping to a single dictionary
132
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
133
  # --- Text Input and Clear Button ---
134
+ word_limit = 200
135
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
136
+ word_count = len(text.split())
137
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
138
  def clear_text():
139
  """Clears the text area."""
140
  st.session_state['my_text_area'] = ""
 
141
  st.button("Clear text", on_click=clear_text)
 
 
142
  # --- Results Section ---
143
  if st.button("Results"):
144
  start_time = time.time()
145
  if not text.strip():
146
  st.warning("Please enter some text to extract entities.")
147
+ elif word_count > word_limit:
148
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
149
  else:
150
  with st.spinner("Extracting entities...", show_time=True):
151
  entities = model.predict_entities(text, labels)
152
  df = pd.DataFrame(entities)
 
153
  if not df.empty:
154
  df['category'] = df['label'].map(reverse_category_mapping)
155
  if comet_initialized:
 
160
  )
161
  experiment.log_parameter("input_text", text)
162
  experiment.log_table("predicted_entities", df)
 
163
  st.subheader("Grouped Entities by Category", divider = "violet")
 
164
  # Create tabs for each category
165
  category_names = sorted(list(category_mapping.keys()))
166
  category_tabs = st.tabs(category_names)
 
167
  for i, category_name in enumerate(category_names):
168
  with category_tabs[i]:
169
  df_category_filtered = df[df['category'] == category_name]
 
171
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
172
  else:
173
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
174
  with st.expander("See Glossary of tags"):
175
  st.write('''
176
  - **text**: ['entity extracted from your text data']
 
180
  - **end**: ['index of the end of the corresponding entity']
181
  ''')
182
  st.divider()
 
183
  # Tree map
184
  st.subheader("Tree map", divider = "violet")
185
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
186
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
187
  st.plotly_chart(fig_treemap)
 
188
  # Pie and Bar charts
189
  grouped_counts = df['category'].value_counts().reset_index()
190
  grouped_counts.columns = ['category', 'count']
191
  col1, col2 = st.columns(2)
 
192
  with col1:
193
  st.subheader("Pie chart", divider = "violet")
194
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
198
  plot_bgcolor='#F3E5F5'
199
  )
200
  st.plotly_chart(fig_pie)
 
 
 
 
201
  with col2:
202
  st.subheader("Bar chart", divider = "violet")
203
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
206
  plot_bgcolor='#F3E5F5'
207
  )
208
  st.plotly_chart(fig_bar)
 
209
  # Most Frequent Entities
210
  st.subheader("Most Frequent Entities", divider="violet")
211
  word_counts = df['text'].value_counts().reset_index()
 
220
  st.plotly_chart(fig_repeating_bar)
221
  else:
222
  st.warning("No entities were found that occur more than once.")
 
223
  # Download Section
224
  st.divider()
 
225
  dfa = pd.DataFrame(
226
  data={
227
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
231
  'accuracy score; how accurately a tag has been assigned to a given entity',
232
  'index of the start of the corresponding entity',
233
  'index of the end of the corresponding entity',
 
234
  ]
235
  }
236
  )
 
238
  with zipfile.ZipFile(buf, "w") as myzip:
239
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
240
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
241
  with stylable_container(
242
  key="download_button",
243
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
248
  file_name="nlpblogs_results.zip",
249
  mime="application/zip",
250
  )
 
251
  if comet_initialized:
252
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
253
  experiment.end()
254
  else: # If df is empty
255
  st.warning("No entities were found in the provided text.")
256
+ end_time = time.time()
 
257
  elapsed_time = end_time - start_time
258
  st.text("")
259
  st.text("")
260
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")