AIEcosystem commited on
Commit
22fe17d
·
verified ·
1 Parent(s): df2df61

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +17 -60
src/streamlit_app.py CHANGED
@@ -12,8 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
  st.markdown(
18
  """
19
  <style>
@@ -59,18 +57,11 @@ st.markdown(
59
  }
60
  </style>
61
  """,
62
- unsafe_allow_html=True
63
- )
64
-
65
-
66
-
67
-
68
-
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
  st.subheader("ChainSense", divider="violet")
72
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
73
-
74
  expander = st.expander("**Important notes**")
75
  expander.write("""**Named Entities:** This ChainSense web app predicts eight (8) labels:"Location", "Organization", "Product_or_Good", "Date", "Quantity", "Transportation_Mode", "Person", "Document_or_Form_ID"
76
 
@@ -86,17 +77,13 @@ Results are presented in easy-to-read tables, visualized in an interactive tree
86
 
87
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
88
 
 
 
89
  with st.sidebar:
90
  st.write("Use the following code to embed the ChainSense web app on your website. Feel free to adjust the width and height values to fit your page.")
91
  code = '''
92
- <iframe
93
- src="https://aiecosystem-chainsense.hf.space"
94
- frameborder="0"
95
- width="850"
96
- height="450"
97
  ></iframe>
98
-
99
-
100
  '''
101
  st.code(code, language="html")
102
  st.text("")
@@ -104,16 +91,13 @@ with st.sidebar:
104
  st.divider()
105
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
106
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
107
-
108
  # --- Comet ML Setup ---
109
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
110
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
111
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
112
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
113
-
114
  if not comet_initialized:
115
  st.warning("Comet ML not initialized. Check environment variables.")
116
-
117
  # --- Label Definitions ---
118
  labels = [
119
  "Location",
@@ -123,23 +107,14 @@ labels = [
123
  "Quantity",
124
  "Transportation_Mode",
125
  "Person",
126
- "Document_or_Form_ID"
127
- ]
128
-
129
-
130
  # Corrected mapping dictionary
131
-
132
  # Create a mapping dictionary for labels to categories
133
  category_mapping = {
134
  "People & Groups": ["Person", "Organization"],
135
  "Goods & Transactions": ["Product_or_Good", "Quantity", "Document_or_Form_ID"],
136
  "Temporal & Events": ["Date", "Transportation_Mode"],
137
- "Locations": ["Location"]
138
- }
139
-
140
-
141
-
142
-
143
  # --- Model Loading ---
144
  @st.cache_resource
145
  def load_ner_model():
@@ -150,30 +125,32 @@ def load_ner_model():
150
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
151
  st.stop()
152
  model = load_ner_model()
153
-
154
  # Flatten the mapping to a single dictionary
155
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
156
-
157
  # --- Text Input and Clear Button ---
158
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
159
-
 
 
 
 
 
160
  def clear_text():
161
  """Clears the text area."""
162
  st.session_state['my_text_area'] = ""
163
-
164
  st.button("Clear text", on_click=clear_text)
165
-
166
-
167
  # --- Results Section ---
168
  if st.button("Results"):
169
  start_time = time.time()
 
170
  if not text.strip():
171
  st.warning("Please enter some text to extract entities.")
 
 
172
  else:
173
  with st.spinner("Extracting entities...", show_time=True):
174
  entities = model.predict_entities(text, labels)
175
  df = pd.DataFrame(entities)
176
-
177
  if not df.empty:
178
  df['category'] = df['label'].map(reverse_category_mapping)
179
  if comet_initialized:
@@ -184,13 +161,10 @@ if st.button("Results"):
184
  )
185
  experiment.log_parameter("input_text", text)
186
  experiment.log_table("predicted_entities", df)
187
-
188
  st.subheader("Grouped Entities by Category", divider = "violet")
189
-
190
  # Create tabs for each category
191
  category_names = sorted(list(category_mapping.keys()))
192
  category_tabs = st.tabs(category_names)
193
-
194
  for i, category_name in enumerate(category_names):
195
  with category_tabs[i]:
196
  df_category_filtered = df[df['category'] == category_name]
@@ -198,9 +172,6 @@ if st.button("Results"):
198
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
199
  else:
200
  st.info(f"No entities found for the '{category_name}' category.")
201
-
202
-
203
-
204
  with st.expander("See Glossary of tags"):
205
  st.write('''
206
  - **text**: ['entity extracted from your text data']
@@ -210,18 +181,15 @@ if st.button("Results"):
210
  - **end**: ['index of the end of the corresponding entity']
211
  ''')
212
  st.divider()
213
-
214
  # Tree map
215
  st.subheader("Tree map", divider = "violet")
216
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
217
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E8F5E9', plot_bgcolor='#E8F5E9')
218
  st.plotly_chart(fig_treemap)
219
-
220
  # Pie and Bar charts
221
  grouped_counts = df['category'].value_counts().reset_index()
222
  grouped_counts.columns = ['category', 'count']
223
  col1, col2 = st.columns(2)
224
-
225
  with col1:
226
  st.subheader("Pie chart", divider = "violet")
227
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -231,10 +199,6 @@ if st.button("Results"):
231
  plot_bgcolor='#E8F5E9'
232
  )
233
  st.plotly_chart(fig_pie)
234
-
235
-
236
-
237
-
238
  with col2:
239
  st.subheader("Bar chart", divider = "violet")
240
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -243,7 +207,6 @@ if st.button("Results"):
243
  plot_bgcolor='#E8F5E9'
244
  )
245
  st.plotly_chart(fig_bar)
246
-
247
  # Most Frequent Entities
248
  st.subheader("Most Frequent Entities", divider="violet")
249
  word_counts = df['text'].value_counts().reset_index()
@@ -258,10 +221,8 @@ if st.button("Results"):
258
  st.plotly_chart(fig_repeating_bar)
259
  else:
260
  st.warning("No entities were found that occur more than once.")
261
-
262
  # Download Section
263
  st.divider()
264
-
265
  dfa = pd.DataFrame(
266
  data={
267
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -271,7 +232,6 @@ if st.button("Results"):
271
  'accuracy score; how accurately a tag has been assigned to a given entity',
272
  'index of the start of the corresponding entity',
273
  'index of the end of the corresponding entity',
274
-
275
  ]
276
  }
277
  )
@@ -279,7 +239,6 @@ if st.button("Results"):
279
  with zipfile.ZipFile(buf, "w") as myzip:
280
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
281
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
282
-
283
  with stylable_container(
284
  key="download_button",
285
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -290,14 +249,12 @@ if st.button("Results"):
290
  file_name="nlpblogs_results.zip",
291
  mime="application/zip",
292
  )
293
-
294
  if comet_initialized:
295
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
296
  experiment.end()
297
  else: # If df is empty
298
  st.warning("No entities were found in the provided text.")
299
-
300
- end_time = time.time()
301
  elapsed_time = end_time - start_time
302
  st.text("")
303
  st.text("")
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
15
  st.markdown(
16
  """
17
  <style>
 
57
  }
58
  </style>
59
  """,
60
+ unsafe_allow_html=True)
 
 
 
 
 
 
61
  # --- Page Configuration and UI Elements ---
62
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
63
  st.subheader("ChainSense", divider="violet")
64
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
65
  expander = st.expander("**Important notes**")
66
  expander.write("""**Named Entities:** This ChainSense web app predicts eight (8) labels:"Location", "Organization", "Product_or_Good", "Date", "Quantity", "Transportation_Mode", "Person", "Document_or_Form_ID"
67
 
 
77
 
78
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
79
 
80
+
81
+
82
  with st.sidebar:
83
  st.write("Use the following code to embed the ChainSense web app on your website. Feel free to adjust the width and height values to fit your page.")
84
  code = '''
85
+ <iframe src="https://aiecosystem-chainsense.hf.space" frameborder="0" width="850" height="450"
 
 
 
 
86
  ></iframe>
 
 
87
  '''
88
  st.code(code, language="html")
89
  st.text("")
 
91
  st.divider()
92
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
93
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
94
  # --- Comet ML Setup ---
95
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
96
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
97
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
98
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
99
  if not comet_initialized:
100
  st.warning("Comet ML not initialized. Check environment variables.")
 
101
  # --- Label Definitions ---
102
  labels = [
103
  "Location",
 
107
  "Quantity",
108
  "Transportation_Mode",
109
  "Person",
110
+ "Document_or_Form_ID"]
 
 
 
111
  # Corrected mapping dictionary
 
112
  # Create a mapping dictionary for labels to categories
113
  category_mapping = {
114
  "People & Groups": ["Person", "Organization"],
115
  "Goods & Transactions": ["Product_or_Good", "Quantity", "Document_or_Form_ID"],
116
  "Temporal & Events": ["Date", "Transportation_Mode"],
117
+ "Locations": ["Location"]}
 
 
 
 
 
118
  # --- Model Loading ---
119
  @st.cache_resource
120
  def load_ner_model():
 
125
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
126
  st.stop()
127
  model = load_ner_model()
 
128
  # Flatten the mapping to a single dictionary
129
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
130
  # --- Text Input and Clear Button ---
131
+ # Define the word limit
132
+ word_limit = 200
133
+ # Update text area with the word limit
134
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
135
+ # Calculate and display the word count
136
+ word_count = len(text.split())
137
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
138
  def clear_text():
139
  """Clears the text area."""
140
  st.session_state['my_text_area'] = ""
 
141
  st.button("Clear text", on_click=clear_text)
 
 
142
  # --- Results Section ---
143
  if st.button("Results"):
144
  start_time = time.time()
145
+ # Check for word limit and empty text first
146
  if not text.strip():
147
  st.warning("Please enter some text to extract entities.")
148
+ elif word_count > word_limit:
149
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
150
  else:
151
  with st.spinner("Extracting entities...", show_time=True):
152
  entities = model.predict_entities(text, labels)
153
  df = pd.DataFrame(entities)
 
154
  if not df.empty:
155
  df['category'] = df['label'].map(reverse_category_mapping)
156
  if comet_initialized:
 
161
  )
162
  experiment.log_parameter("input_text", text)
163
  experiment.log_table("predicted_entities", df)
 
164
  st.subheader("Grouped Entities by Category", divider = "violet")
 
165
  # Create tabs for each category
166
  category_names = sorted(list(category_mapping.keys()))
167
  category_tabs = st.tabs(category_names)
 
168
  for i, category_name in enumerate(category_names):
169
  with category_tabs[i]:
170
  df_category_filtered = df[df['category'] == category_name]
 
172
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
173
  else:
174
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
175
  with st.expander("See Glossary of tags"):
176
  st.write('''
177
  - **text**: ['entity extracted from your text data']
 
181
  - **end**: ['index of the end of the corresponding entity']
182
  ''')
183
  st.divider()
 
184
  # Tree map
185
  st.subheader("Tree map", divider = "violet")
186
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
187
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E8F5E9', plot_bgcolor='#E8F5E9')
188
  st.plotly_chart(fig_treemap)
 
189
  # Pie and Bar charts
190
  grouped_counts = df['category'].value_counts().reset_index()
191
  grouped_counts.columns = ['category', 'count']
192
  col1, col2 = st.columns(2)
 
193
  with col1:
194
  st.subheader("Pie chart", divider = "violet")
195
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
199
  plot_bgcolor='#E8F5E9'
200
  )
201
  st.plotly_chart(fig_pie)
 
 
 
 
202
  with col2:
203
  st.subheader("Bar chart", divider = "violet")
204
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
207
  plot_bgcolor='#E8F5E9'
208
  )
209
  st.plotly_chart(fig_bar)
 
210
  # Most Frequent Entities
211
  st.subheader("Most Frequent Entities", divider="violet")
212
  word_counts = df['text'].value_counts().reset_index()
 
221
  st.plotly_chart(fig_repeating_bar)
222
  else:
223
  st.warning("No entities were found that occur more than once.")
 
224
  # Download Section
225
  st.divider()
 
226
  dfa = pd.DataFrame(
227
  data={
228
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
232
  'accuracy score; how accurately a tag has been assigned to a given entity',
233
  'index of the start of the corresponding entity',
234
  'index of the end of the corresponding entity',
 
235
  ]
236
  }
237
  )
 
239
  with zipfile.ZipFile(buf, "w") as myzip:
240
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
241
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
242
  with stylable_container(
243
  key="download_button",
244
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
249
  file_name="nlpblogs_results.zip",
250
  mime="application/zip",
251
  )
 
252
  if comet_initialized:
253
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
254
  experiment.end()
255
  else: # If df is empty
256
  st.warning("No entities were found in the provided text.")
257
+ end_time = time.time()
 
258
  elapsed_time = end_time - start_time
259
  st.text("")
260
  st.text("")