AIEcosystem commited on
Commit
6bc7360
Β·
verified Β·
1 Parent(s): fa070d1

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -61
src/streamlit_app.py CHANGED
@@ -12,8 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
  st.markdown(
18
  """
19
  <style>
@@ -59,13 +57,7 @@ st.markdown(
59
  }
60
  </style>
61
  """,
62
- unsafe_allow_html=True
63
- )
64
-
65
-
66
-
67
-
68
-
69
  # --- Page Configuration and UI Elements ---
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
  st.subheader("EntityFinance", divider="violet")
@@ -85,16 +77,12 @@ Results are presented in easy-to-read tables, visualized in an interactive tree
85
 
86
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
87
 
 
88
  with st.sidebar:
89
  st.write("Use the following code to embed the EntityFinance web app on your website. Feel free to adjust the width and height values to fit your page.")
90
  code = '''
91
- <iframe
92
- src="https://aiecosystem-entityfinance.hf.space"
93
- frameborder="0"
94
- width="850"
95
- height="450"
96
  ></iframe>
97
-
98
  '''
99
  st.code(code, language="html")
100
  st.text("")
@@ -102,16 +90,13 @@ with st.sidebar:
102
  st.divider()
103
  st.subheader("πŸš€ Ready to build your own AI Web App?", divider="violet")
104
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
105
-
106
  # --- Comet ML Setup ---
107
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
108
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
109
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
110
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
111
-
112
  if not comet_initialized:
113
  st.warning("Comet ML not initialized. Check environment variables.")
114
-
115
  # --- Label Definitions ---
116
  labels = [
117
  "Monetary_value",
@@ -124,16 +109,11 @@ labels = [
124
  "Person",
125
  "Product",
126
  "Service",
127
-
128
  "Organization",
129
  "Location",
130
  "Date",
131
- "Time"
132
- ]
133
-
134
-
135
  # Corrected mapping dictionary
136
-
137
  # Create a mapping dictionary for labels to categories
138
  category_mapping = {
139
  "People & Groups": [ "Person",
@@ -146,13 +126,7 @@ category_mapping = {
146
  "Financial_metric", "Product", "Service"],
147
  "Temporal": ["Date", "Time"],
148
  "Locations": ["Location"],
149
- "Documents & Context": ["Financial_document"]
150
- }
151
-
152
-
153
-
154
-
155
-
156
  # --- Model Loading ---
157
  @st.cache_resource
158
  def load_ner_model():
@@ -163,30 +137,28 @@ def load_ner_model():
163
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
164
  st.stop()
165
  model = load_ner_model()
166
-
167
  # Flatten the mapping to a single dictionary
168
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
169
-
170
  # --- Text Input and Clear Button ---
171
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
172
-
 
 
173
  def clear_text():
174
  """Clears the text area."""
175
  st.session_state['my_text_area'] = ""
176
-
177
  st.button("Clear text", on_click=clear_text)
178
-
179
-
180
  # --- Results Section ---
181
  if st.button("Results"):
182
  start_time = time.time()
183
  if not text.strip():
184
  st.warning("Please enter some text to extract entities.")
 
 
185
  else:
186
  with st.spinner("Extracting entities...", show_time=True):
187
  entities = model.predict_entities(text, labels)
188
  df = pd.DataFrame(entities)
189
-
190
  if not df.empty:
191
  df['category'] = df['label'].map(reverse_category_mapping)
192
  if comet_initialized:
@@ -197,13 +169,10 @@ if st.button("Results"):
197
  )
198
  experiment.log_parameter("input_text", text)
199
  experiment.log_table("predicted_entities", df)
200
-
201
  st.subheader("Grouped Entities by Category", divider = "violet")
202
-
203
  # Create tabs for each category
204
  category_names = sorted(list(category_mapping.keys()))
205
  category_tabs = st.tabs(category_names)
206
-
207
  for i, category_name in enumerate(category_names):
208
  with category_tabs[i]:
209
  df_category_filtered = df[df['category'] == category_name]
@@ -211,9 +180,6 @@ if st.button("Results"):
211
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
212
  else:
213
  st.info(f"No entities found for the '{category_name}' category.")
214
-
215
-
216
-
217
  with st.expander("See Glossary of tags"):
218
  st.write('''
219
  - **text**: ['entity extracted from your text data']
@@ -223,18 +189,15 @@ if st.button("Results"):
223
  - **end**: ['index of the end of the corresponding entity']
224
  ''')
225
  st.divider()
226
-
227
  # Tree map
228
  st.subheader("Tree map", divider = "violet")
229
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
230
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E8F5E9', plot_bgcolor='#E8F5E9')
231
  st.plotly_chart(fig_treemap)
232
-
233
  # Pie and Bar charts
234
  grouped_counts = df['category'].value_counts().reset_index()
235
  grouped_counts.columns = ['category', 'count']
236
  col1, col2 = st.columns(2)
237
-
238
  with col1:
239
  st.subheader("Pie chart", divider = "violet")
240
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -244,10 +207,6 @@ if st.button("Results"):
244
  plot_bgcolor='#E8F5E9'
245
  )
246
  st.plotly_chart(fig_pie)
247
-
248
-
249
-
250
-
251
  with col2:
252
  st.subheader("Bar chart", divider = "violet")
253
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -256,7 +215,6 @@ if st.button("Results"):
256
  plot_bgcolor='#E8F5E9'
257
  )
258
  st.plotly_chart(fig_bar)
259
-
260
  # Most Frequent Entities
261
  st.subheader("Most Frequent Entities", divider="violet")
262
  word_counts = df['text'].value_counts().reset_index()
@@ -271,10 +229,8 @@ if st.button("Results"):
271
  st.plotly_chart(fig_repeating_bar)
272
  else:
273
  st.warning("No entities were found that occur more than once.")
274
-
275
  # Download Section
276
  st.divider()
277
-
278
  dfa = pd.DataFrame(
279
  data={
280
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -284,7 +240,6 @@ if st.button("Results"):
284
  'accuracy score; how accurately a tag has been assigned to a given entity',
285
  'index of the start of the corresponding entity',
286
  'index of the end of the corresponding entity',
287
-
288
  ]
289
  }
290
  )
@@ -292,7 +247,6 @@ if st.button("Results"):
292
  with zipfile.ZipFile(buf, "w") as myzip:
293
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
294
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
295
-
296
  with stylable_container(
297
  key="download_button",
298
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -303,15 +257,13 @@ if st.button("Results"):
303
  file_name="nlpblogs_results.zip",
304
  mime="application/zip",
305
  )
306
-
307
  if comet_initialized:
308
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
309
  experiment.end()
310
  else: # If df is empty
311
  st.warning("No entities were found in the provided text.")
312
-
313
- end_time = time.time()
314
  elapsed_time = end_time - start_time
315
  st.text("")
316
  st.text("")
317
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
15
  st.markdown(
16
  """
17
  <style>
 
57
  }
58
  </style>
59
  """,
60
+ unsafe_allow_html=True)
 
 
 
 
 
 
61
  # --- Page Configuration and UI Elements ---
62
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
63
  st.subheader("EntityFinance", divider="violet")
 
77
 
78
  For any errors or inquiries, please contact us at info@nlpblogs.com""")
79
 
80
+
81
  with st.sidebar:
82
  st.write("Use the following code to embed the EntityFinance web app on your website. Feel free to adjust the width and height values to fit your page.")
83
  code = '''
84
+ <iframe src="https://aiecosystem-entityfinance.hf.space" frameborder="0" width="850" height="450"
 
 
 
 
85
  ></iframe>
 
86
  '''
87
  st.code(code, language="html")
88
  st.text("")
 
90
  st.divider()
91
  st.subheader("πŸš€ Ready to build your own AI Web App?", divider="violet")
92
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
93
  # --- Comet ML Setup ---
94
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
95
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
96
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
97
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
98
  if not comet_initialized:
99
  st.warning("Comet ML not initialized. Check environment variables.")
 
100
  # --- Label Definitions ---
101
  labels = [
102
  "Monetary_value",
 
109
  "Person",
110
  "Product",
111
  "Service",
 
112
  "Organization",
113
  "Location",
114
  "Date",
115
+ "Time"]
 
 
 
116
  # Corrected mapping dictionary
 
117
  # Create a mapping dictionary for labels to categories
118
  category_mapping = {
119
  "People & Groups": [ "Person",
 
126
  "Financial_metric", "Product", "Service"],
127
  "Temporal": ["Date", "Time"],
128
  "Locations": ["Location"],
129
+ "Documents & Context": ["Financial_document"]}
 
 
 
 
 
 
130
  # --- Model Loading ---
131
  @st.cache_resource
132
  def load_ner_model():
 
137
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
138
  st.stop()
139
  model = load_ner_model()
 
140
  # Flatten the mapping to a single dictionary
141
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
142
  # --- Text Input and Clear Button ---
143
+ word_limit = 200
144
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
145
+ word_count = len(text.split())
146
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
147
  def clear_text():
148
  """Clears the text area."""
149
  st.session_state['my_text_area'] = ""
 
150
  st.button("Clear text", on_click=clear_text)
 
 
151
  # --- Results Section ---
152
  if st.button("Results"):
153
  start_time = time.time()
154
  if not text.strip():
155
  st.warning("Please enter some text to extract entities.")
156
+ elif word_count > word_limit:
157
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
158
  else:
159
  with st.spinner("Extracting entities...", show_time=True):
160
  entities = model.predict_entities(text, labels)
161
  df = pd.DataFrame(entities)
 
162
  if not df.empty:
163
  df['category'] = df['label'].map(reverse_category_mapping)
164
  if comet_initialized:
 
169
  )
170
  experiment.log_parameter("input_text", text)
171
  experiment.log_table("predicted_entities", df)
 
172
  st.subheader("Grouped Entities by Category", divider = "violet")
 
173
  # Create tabs for each category
174
  category_names = sorted(list(category_mapping.keys()))
175
  category_tabs = st.tabs(category_names)
 
176
  for i, category_name in enumerate(category_names):
177
  with category_tabs[i]:
178
  df_category_filtered = df[df['category'] == category_name]
 
180
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
181
  else:
182
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
183
  with st.expander("See Glossary of tags"):
184
  st.write('''
185
  - **text**: ['entity extracted from your text data']
 
189
  - **end**: ['index of the end of the corresponding entity']
190
  ''')
191
  st.divider()
 
192
  # Tree map
193
  st.subheader("Tree map", divider = "violet")
194
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
195
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E8F5E9', plot_bgcolor='#E8F5E9')
196
  st.plotly_chart(fig_treemap)
 
197
  # Pie and Bar charts
198
  grouped_counts = df['category'].value_counts().reset_index()
199
  grouped_counts.columns = ['category', 'count']
200
  col1, col2 = st.columns(2)
 
201
  with col1:
202
  st.subheader("Pie chart", divider = "violet")
203
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
207
  plot_bgcolor='#E8F5E9'
208
  )
209
  st.plotly_chart(fig_pie)
 
 
 
 
210
  with col2:
211
  st.subheader("Bar chart", divider = "violet")
212
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
215
  plot_bgcolor='#E8F5E9'
216
  )
217
  st.plotly_chart(fig_bar)
 
218
  # Most Frequent Entities
219
  st.subheader("Most Frequent Entities", divider="violet")
220
  word_counts = df['text'].value_counts().reset_index()
 
229
  st.plotly_chart(fig_repeating_bar)
230
  else:
231
  st.warning("No entities were found that occur more than once.")
 
232
  # Download Section
233
  st.divider()
 
234
  dfa = pd.DataFrame(
235
  data={
236
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
240
  'accuracy score; how accurately a tag has been assigned to a given entity',
241
  'index of the start of the corresponding entity',
242
  'index of the end of the corresponding entity',
 
243
  ]
244
  }
245
  )
 
247
  with zipfile.ZipFile(buf, "w") as myzip:
248
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
249
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
250
  with stylable_container(
251
  key="download_button",
252
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
257
  file_name="nlpblogs_results.zip",
258
  mime="application/zip",
259
  )
 
260
  if comet_initialized:
261
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
262
  experiment.end()
263
  else: # If df is empty
264
  st.warning("No entities were found in the provided text.")
265
+ end_time = time.time()
 
266
  elapsed_time = end_time - start_time
267
  st.text("")
268
  st.text("")
269
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")