AIEcosystem commited on
Commit
1543442
·
verified ·
1 Parent(s): 01ef7e8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +11 -61
src/streamlit_app.py CHANGED
@@ -12,9 +12,6 @@ from streamlit_extras.stylable_container import stylable_container
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
15
-
16
-
17
-
18
  st.markdown(
19
  """
20
  <style>
@@ -23,41 +20,34 @@ st.markdown(
23
  background-color: #E0FFFF; /* Light cyan, a very pale blue */
24
  color: #000000; /* Black for the text */
25
  }
26
-
27
  /* Sidebar background color */
28
  .css-1d36184 {
29
  background-color: #ADD8E6; /* Light blue for the sidebar */
30
  secondary-background-color: #ADD8E6;
31
  }
32
-
33
- /* Expander background color */
34
  .streamlit-expanderContent {
35
  background-color: #E0FFFF;
36
  }
37
-
38
- /* Expander header background color */
39
  .streamlit-expanderHeader {
40
  background-color: #E0FFFF;
41
  }
42
-
43
  /* Text Area background and text color */
44
  .stTextArea textarea {
45
  background-color: #B0E0E6; /* Powder blue, a light, soft blue */
46
  color: #000000; /* Black for text */
47
  }
48
-
49
  /* Button background and text color */
50
  .stButton > button {
51
  background-color: #B0E0E6;
52
  color: #000000;
53
  }
54
-
55
  /* Warning box background and text color */
56
  .stAlert.st-warning {
57
  background-color: #87CEEB; /* Sky blue for the warning box */
58
  color: #000000;
59
  }
60
-
61
  /* Success box background and text color */
62
  .stAlert.st-success {
63
  background-color: #87CEEB; /* Sky blue for the success box */
@@ -65,13 +55,7 @@ st.markdown(
65
  }
66
  </style>
67
  """,
68
- unsafe_allow_html=True
69
- )
70
-
71
-
72
-
73
-
74
-
75
  # --- Page Configuration and UI Elements ---
76
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
77
  st.subheader("StoryCraft", divider="blue")
@@ -94,13 +78,7 @@ For any errors or inquiries, please contact us at info@nlpblogs.com""")
94
  with st.sidebar:
95
  st.write("Use the following code to embed the StoryCraft web app on your website. Feel free to adjust the width and height values to fit your page.")
96
  code = '''
97
- <iframe
98
- src="https://aiecosystem-storycraft.hf.space"
99
- frameborder="0"
100
- width="850"
101
- height="450"
102
- ></iframe>
103
-
104
  '''
105
  st.code(code, language="html")
106
  st.text("")
@@ -108,28 +86,22 @@ with st.sidebar:
108
  st.divider()
109
  st.subheader("🚀 Ready to build your own AI Web App?", divider="blue")
110
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
111
-
112
  # --- Comet ML Setup ---
113
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
114
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
115
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
116
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
117
-
118
  if not comet_initialized:
119
  st.warning("Comet ML not initialized. Check environment variables.")
120
-
121
  # --- Label Definitions ---
122
  labels = ["Person","Organization","Location","Date","Time","Quantity","Product","Event","Title","Job_title","Artwork","Media","URL","Website","Hashtag","Email","IP_address","File_path"]
123
-
124
  # Corrected mapping dictionary
125
-
126
  # Create a mapping dictionary for labels to categories
127
  category_mapping = {
128
  "Core Foundational Entities": ["Person", "Organization", "Location", "Date", "Time", "Quantity"],
129
  "Content Enrichment Entities": ["Product", "Event", "Title", "Job_title", "Artwork", "Media"],
130
  "Digital & Technical Entities": ["URL", "Website", "Hashtag", "Email", "IP_address", "File_path"],
131
  }
132
-
133
  # --- Model Loading ---
134
  @st.cache_resource
135
  def load_ner_model():
@@ -140,30 +112,28 @@ def load_ner_model():
140
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
141
  st.stop()
142
  model = load_ner_model()
143
-
144
  # Flatten the mapping to a single dictionary
145
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
146
-
147
  # --- Text Input and Clear Button ---
148
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
149
-
 
 
150
  def clear_text():
151
  """Clears the text area."""
152
  st.session_state['my_text_area'] = ""
153
-
154
  st.button("Clear text", on_click=clear_text)
155
-
156
-
157
  # --- Results Section ---
158
  if st.button("Results"):
159
  start_time = time.time()
160
  if not text.strip():
161
  st.warning("Please enter some text to extract entities.")
 
 
162
  else:
163
  with st.spinner("Extracting entities...", show_time=True):
164
  entities = model.predict_entities(text, labels)
165
  df = pd.DataFrame(entities)
166
-
167
  if not df.empty:
168
  df['category'] = df['label'].map(reverse_category_mapping)
169
  if comet_initialized:
@@ -174,13 +144,10 @@ if st.button("Results"):
174
  )
175
  experiment.log_parameter("input_text", text)
176
  experiment.log_table("predicted_entities", df)
177
-
178
  st.subheader("Grouped Entities by Category", divider = "blue")
179
-
180
  # Create tabs for each category
181
  category_names = sorted(list(category_mapping.keys()))
182
  category_tabs = st.tabs(category_names)
183
-
184
  for i, category_name in enumerate(category_names):
185
  with category_tabs[i]:
186
  df_category_filtered = df[df['category'] == category_name]
@@ -188,9 +155,6 @@ if st.button("Results"):
188
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
189
  else:
190
  st.info(f"No entities found for the '{category_name}' category.")
191
-
192
-
193
-
194
  with st.expander("See Glossary of tags"):
195
  st.write('''
196
  - **text**: ['entity extracted from your text data']
@@ -200,18 +164,15 @@ if st.button("Results"):
200
  - **end**: ['index of the end of the corresponding entity']
201
  ''')
202
  st.divider()
203
-
204
  # Tree map
205
  st.subheader("Tree map", divider = "blue")
206
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
207
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E0FFFF', plot_bgcolor='#E0FFFF')
208
  st.plotly_chart(fig_treemap)
209
-
210
  # Pie and Bar charts
211
  grouped_counts = df['category'].value_counts().reset_index()
212
  grouped_counts.columns = ['category', 'count']
213
  col1, col2 = st.columns(2)
214
-
215
  with col1:
216
  st.subheader("Pie chart", divider = "blue")
217
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
@@ -221,10 +182,6 @@ if st.button("Results"):
221
  plot_bgcolor='#E0FFFF'
222
  )
223
  st.plotly_chart(fig_pie)
224
-
225
-
226
-
227
-
228
  with col2:
229
  st.subheader("Bar chart", divider = "blue")
230
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
@@ -233,7 +190,6 @@ if st.button("Results"):
233
  plot_bgcolor='#E0FFFF'
234
  )
235
  st.plotly_chart(fig_bar)
236
-
237
  # Most Frequent Entities
238
  st.subheader("Most Frequent Entities", divider="blue")
239
  word_counts = df['text'].value_counts().reset_index()
@@ -248,10 +204,8 @@ if st.button("Results"):
248
  st.plotly_chart(fig_repeating_bar)
249
  else:
250
  st.warning("No entities were found that occur more than once.")
251
-
252
  # Download Section
253
  st.divider()
254
-
255
  dfa = pd.DataFrame(
256
  data={
257
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -261,7 +215,6 @@ if st.button("Results"):
261
  'accuracy score; how accurately a tag has been assigned to a given entity',
262
  'index of the start of the corresponding entity',
263
  'index of the end of the corresponding entity',
264
-
265
  ]
266
  }
267
  )
@@ -269,7 +222,6 @@ if st.button("Results"):
269
  with zipfile.ZipFile(buf, "w") as myzip:
270
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
271
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
272
-
273
  with stylable_container(
274
  key="download_button",
275
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -280,14 +232,12 @@ if st.button("Results"):
280
  file_name="nlpblogs_results.zip",
281
  mime="application/zip",
282
  )
283
-
284
  if comet_initialized:
285
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
286
  experiment.end()
287
  else: # If df is empty
288
  st.warning("No entities were found in the provided text.")
289
-
290
- end_time = time.time()
291
  elapsed_time = end_time - start_time
292
  st.text("")
293
  st.text("")
 
12
  from typing import Optional
13
  from gliner import GLiNER
14
  from comet_ml import Experiment
 
 
 
15
  st.markdown(
16
  """
17
  <style>
 
20
  background-color: #E0FFFF; /* Light cyan, a very pale blue */
21
  color: #000000; /* Black for the text */
22
  }
 
23
  /* Sidebar background color */
24
  .css-1d36184 {
25
  background-color: #ADD8E6; /* Light blue for the sidebar */
26
  secondary-background-color: #ADD8E6;
27
  }
28
+ /* Expander background color */
 
29
  .streamlit-expanderContent {
30
  background-color: #E0FFFF;
31
  }
32
+ /* Expander header background color */
 
33
  .streamlit-expanderHeader {
34
  background-color: #E0FFFF;
35
  }
 
36
  /* Text Area background and text color */
37
  .stTextArea textarea {
38
  background-color: #B0E0E6; /* Powder blue, a light, soft blue */
39
  color: #000000; /* Black for text */
40
  }
 
41
  /* Button background and text color */
42
  .stButton > button {
43
  background-color: #B0E0E6;
44
  color: #000000;
45
  }
 
46
  /* Warning box background and text color */
47
  .stAlert.st-warning {
48
  background-color: #87CEEB; /* Sky blue for the warning box */
49
  color: #000000;
50
  }
 
51
  /* Success box background and text color */
52
  .stAlert.st-success {
53
  background-color: #87CEEB; /* Sky blue for the success box */
 
55
  }
56
  </style>
57
  """,
58
+ unsafe_allow_html=True)
 
 
 
 
 
 
59
  # --- Page Configuration and UI Elements ---
60
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
61
  st.subheader("StoryCraft", divider="blue")
 
78
  with st.sidebar:
79
  st.write("Use the following code to embed the StoryCraft web app on your website. Feel free to adjust the width and height values to fit your page.")
80
  code = '''
81
+ <iframe src="https://aiecosystem-storycraft.hf.space" frameborder="0" width="850" height="450" ></iframe>
 
 
 
 
 
 
82
  '''
83
  st.code(code, language="html")
84
  st.text("")
 
86
  st.divider()
87
  st.subheader("🚀 Ready to build your own AI Web App?", divider="blue")
88
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
89
  # --- Comet ML Setup ---
90
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
91
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
92
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
93
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
94
  if not comet_initialized:
95
  st.warning("Comet ML not initialized. Check environment variables.")
 
96
  # --- Label Definitions ---
97
  labels = ["Person","Organization","Location","Date","Time","Quantity","Product","Event","Title","Job_title","Artwork","Media","URL","Website","Hashtag","Email","IP_address","File_path"]
 
98
  # Corrected mapping dictionary
 
99
  # Create a mapping dictionary for labels to categories
100
  category_mapping = {
101
  "Core Foundational Entities": ["Person", "Organization", "Location", "Date", "Time", "Quantity"],
102
  "Content Enrichment Entities": ["Product", "Event", "Title", "Job_title", "Artwork", "Media"],
103
  "Digital & Technical Entities": ["URL", "Website", "Hashtag", "Email", "IP_address", "File_path"],
104
  }
 
105
  # --- Model Loading ---
106
  @st.cache_resource
107
  def load_ner_model():
 
112
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
113
  st.stop()
114
  model = load_ner_model()
 
115
  # Flatten the mapping to a single dictionary
116
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
117
  # --- Text Input and Clear Button ---
118
+ word_limit = 200
119
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
120
+ word_count = len(text.split())
121
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
122
  def clear_text():
123
  """Clears the text area."""
124
  st.session_state['my_text_area'] = ""
 
125
  st.button("Clear text", on_click=clear_text)
 
 
126
  # --- Results Section ---
127
  if st.button("Results"):
128
  start_time = time.time()
129
  if not text.strip():
130
  st.warning("Please enter some text to extract entities.")
131
+ elif word_count > word_limit:
132
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
133
  else:
134
  with st.spinner("Extracting entities...", show_time=True):
135
  entities = model.predict_entities(text, labels)
136
  df = pd.DataFrame(entities)
 
137
  if not df.empty:
138
  df['category'] = df['label'].map(reverse_category_mapping)
139
  if comet_initialized:
 
144
  )
145
  experiment.log_parameter("input_text", text)
146
  experiment.log_table("predicted_entities", df)
 
147
  st.subheader("Grouped Entities by Category", divider = "blue")
 
148
  # Create tabs for each category
149
  category_names = sorted(list(category_mapping.keys()))
150
  category_tabs = st.tabs(category_names)
 
151
  for i, category_name in enumerate(category_names):
152
  with category_tabs[i]:
153
  df_category_filtered = df[df['category'] == category_name]
 
155
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
156
  else:
157
  st.info(f"No entities found for the '{category_name}' category.")
 
 
 
158
  with st.expander("See Glossary of tags"):
159
  st.write('''
160
  - **text**: ['entity extracted from your text data']
 
164
  - **end**: ['index of the end of the corresponding entity']
165
  ''')
166
  st.divider()
 
167
  # Tree map
168
  st.subheader("Tree map", divider = "blue")
169
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
170
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#E0FFFF', plot_bgcolor='#E0FFFF')
171
  st.plotly_chart(fig_treemap)
 
172
  # Pie and Bar charts
173
  grouped_counts = df['category'].value_counts().reset_index()
174
  grouped_counts.columns = ['category', 'count']
175
  col1, col2 = st.columns(2)
 
176
  with col1:
177
  st.subheader("Pie chart", divider = "blue")
178
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
 
182
  plot_bgcolor='#E0FFFF'
183
  )
184
  st.plotly_chart(fig_pie)
 
 
 
 
185
  with col2:
186
  st.subheader("Bar chart", divider = "blue")
187
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
 
190
  plot_bgcolor='#E0FFFF'
191
  )
192
  st.plotly_chart(fig_bar)
 
193
  # Most Frequent Entities
194
  st.subheader("Most Frequent Entities", divider="blue")
195
  word_counts = df['text'].value_counts().reset_index()
 
204
  st.plotly_chart(fig_repeating_bar)
205
  else:
206
  st.warning("No entities were found that occur more than once.")
 
207
  # Download Section
208
  st.divider()
 
209
  dfa = pd.DataFrame(
210
  data={
211
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
215
  'accuracy score; how accurately a tag has been assigned to a given entity',
216
  'index of the start of the corresponding entity',
217
  'index of the end of the corresponding entity',
 
218
  ]
219
  }
220
  )
 
222
  with zipfile.ZipFile(buf, "w") as myzip:
223
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
224
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
225
  with stylable_container(
226
  key="download_button",
227
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
232
  file_name="nlpblogs_results.zip",
233
  mime="application/zip",
234
  )
 
235
  if comet_initialized:
236
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
237
  experiment.end()
238
  else: # If df is empty
239
  st.warning("No entities were found in the provided text.")
240
+ end_time = time.time()
 
241
  elapsed_time = end_time - start_time
242
  st.text("")
243
  st.text("")