AIEcosystem commited on
Commit
5283a70
·
verified ·
1 Parent(s): 90cd943

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +60 -40
src/streamlit_app.py CHANGED
@@ -13,13 +13,57 @@ from streamlit_extras.stylable_container import stylable_container
13
  from typing import Optional
14
  from gliner import GLiNER
15
  from comet_ml import Experiment
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # --- Page Configuration and UI Elements ---
18
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
19
  st.subheader("DataHarvest", divider="violet")
20
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
21
  st.markdown(':rainbow[**Supported Languages: English**]')
22
-
23
  expander = st.expander("**Important notes**")
24
  expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
25
 
@@ -29,18 +73,15 @@ Results are presented in easy-to-read tables, visualized in an interactive tree
29
 
30
  **Usage Limits:** You can request results unlimited times for one (1) month.
31
 
32
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
33
 
34
  with st.sidebar:
35
  st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.")
36
  code = '''
37
- <iframe
38
- src="https://aiecosystem-dataharvest.hf.space"
39
- frameborder="0"
40
- width="850"
41
- height="450"
42
  ></iframe>
43
-
44
  '''
45
  st.code(code, language="html")
46
  st.text("")
@@ -48,7 +89,6 @@ with st.sidebar:
48
  st.divider()
49
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
50
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
51
-
52
  # --- Comet ML Setup ---
53
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
54
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
@@ -56,20 +96,15 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
56
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
57
  if not comet_initialized:
58
  st.warning("Comet ML not initialized. Check environment variables.")
59
-
60
  # --- Label Definitions ---
61
-
62
  labels = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
63
-
64
  # Corrected mapping dictionary
65
  # Create a mapping dictionary for labels to categories
66
  category_mapping = {
67
  "People": ["person", "organization", "position"],
68
  "Locations": ["country", "city"],
69
  "Time": ["date", "time"],
70
- "Numbers": ["money", "cardinal"]
71
- }
72
-
73
  # --- Model Loading ---
74
  @st.cache_resource
75
  def load_ner_model():
@@ -79,31 +114,29 @@ def load_ner_model():
79
  except Exception as e:
80
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
81
  st.stop()
82
-
83
  model = load_ner_model()
84
-
85
  # Flatten the mapping to a single dictionary
86
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
87
-
88
  # --- Text Input and Clear Button ---
89
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
90
-
 
 
91
  def clear_text():
92
  """Clears the text area."""
93
  st.session_state['my_text_area'] = ""
94
-
95
  def remove_punctuation(text):
96
  """Removes punctuation from a string."""
97
  translator = str.maketrans('', '', string.punctuation)
98
  return text.translate(translator)
99
-
100
  st.button("Clear text", on_click=clear_text)
101
-
102
  # --- Results Section ---
103
  if st.button("Results"):
104
  start_time = time.time()
105
  if not text.strip():
106
  st.warning("Please enter some text to extract entities.")
 
 
107
  else:
108
  # Call the new function to remove punctuation from the input text
109
  cleaned_text = remove_punctuation(text)
@@ -121,13 +154,10 @@ if st.button("Results"):
121
  )
122
  experiment.log_parameter("input_text", text)
123
  experiment.log_table("predicted_entities", df)
124
-
125
  st.subheader("Grouped Entities by Category", divider = "violet")
126
-
127
  # Create tabs for each category
128
  category_names = sorted(list(category_mapping.keys()))
129
  category_tabs = st.tabs(category_names)
130
-
131
  for i, category_name in enumerate(category_names):
132
  with category_tabs[i]:
133
  df_category_filtered = df[df['category'] == category_name]
@@ -135,7 +165,6 @@ if st.button("Results"):
135
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
136
  else:
137
  st.info(f"No entities found for the '{category_name}' category.")
138
-
139
  with st.expander("See Glossary of tags"):
140
  st.write('''
141
  - **text**: ['entity extracted from your text data']
@@ -145,33 +174,28 @@ if st.button("Results"):
145
  - **end**: ['index of the end of the corresponding entity']
146
  ''')
147
  st.divider()
148
-
149
  # Tree map
150
  st.subheader("Tree map", divider = "violet")
151
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
152
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
153
  st.plotly_chart(fig_treemap)
154
-
155
  # Pie and Bar charts
156
  grouped_counts = df['category'].value_counts().reset_index()
157
  grouped_counts.columns = ['category', 'count']
158
  col1, col2 = st.columns(2)
159
-
160
  with col1:
161
  st.subheader("Pie chart", divider = "violet")
162
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
163
  fig_pie.update_traces(textposition='inside', textinfo='percent+label')
164
  fig_pie.update_layout(
165
- )
166
  st.plotly_chart(fig_pie)
167
-
168
  with col2:
169
  st.subheader("Bar chart", divider = "violet")
170
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
171
  fig_bar.update_layout( # Changed from fig_pie to fig_bar
172
- )
173
  st.plotly_chart(fig_bar)
174
-
175
  # Most Frequent Entities
176
  st.subheader("Most Frequent Entities", divider="violet")
177
  word_counts = df['text'].value_counts().reset_index()
@@ -185,10 +209,8 @@ if st.button("Results"):
185
  st.plotly_chart(fig_repeating_bar)
186
  else:
187
  st.warning("No entities were found that occur more than once.")
188
-
189
  # Download Section
190
  st.divider()
191
-
192
  dfa = pd.DataFrame(
193
  data={
194
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
@@ -205,7 +227,6 @@ if st.button("Results"):
205
  with zipfile.ZipFile(buf, "w") as myzip:
206
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
207
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
208
-
209
  with stylable_container(
210
  key="download_button",
211
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -216,7 +237,6 @@ if st.button("Results"):
216
  file_name="nlpblogs_results.zip",
217
  mime="application/zip",
218
  )
219
-
220
  if comet_initialized:
221
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
222
  experiment.end()
@@ -226,4 +246,4 @@ if st.button("Results"):
226
  elapsed_time = end_time - start_time
227
  st.text("")
228
  st.text("")
229
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
13
  from typing import Optional
14
  from gliner import GLiNER
15
  from comet_ml import Experiment
16
+ st.markdown(
17
+ """
18
+ <style>
19
+ /* Main app background and text color */
20
+ .stApp {
21
+ background-color: #E8F5E9; /* A very light green */
22
+ color: #1B5E20; /* Dark green for the text */
23
+ }
24
+
25
+ /* Sidebar background color */
26
+ .css-1d36184 {
27
+ background-color: #A5D6A7; /* A medium light green */
28
+ secondary-background-color: #A5D6A7;
29
+ }
30
+
31
+ /* Expander background color and header */
32
+ .streamlit-expanderContent, .streamlit-expanderHeader {
33
+ background-color: #E8F5E9;
34
+ }
35
+
36
+ /* Text Area background and text color */
37
+ .stTextArea textarea {
38
+ background-color: #81C784; /* A slightly darker medium green */
39
+ color: #1B5E20; /* Dark green for text */
40
+ }
41
+
42
+ /* Button background and text color */
43
+ .stButton > button {
44
+ background-color: #81C784;
45
+ color: #1B5E20;
46
+ }
47
+
48
+ /* Warning box background and text color */
49
+ .stAlert.st-warning {
50
+ background-color: #66BB6A; /* A medium-dark green for the warning box */
51
+ color: #1B5E20;
52
+ }
53
+
54
+ /* Success box background and text color */
55
+ .stAlert.st-success {
56
+ background-color: #66BB6A; /* A medium-dark green for the success box */
57
+ color: #1B5E20;
58
+ }
59
+ </style>
60
+ """,
61
+ unsafe_allow_html=True)
62
  # --- Page Configuration and UI Elements ---
63
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
64
  st.subheader("DataHarvest", divider="violet")
65
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
66
  st.markdown(':rainbow[**Supported Languages: English**]')
 
67
  expander = st.expander("**Important notes**")
68
  expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
69
 
 
73
 
74
  **Usage Limits:** You can request results unlimited times for one (1) month.
75
 
76
+ **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
77
+
78
+ For any errors or inquiries, please contact us at info@nlpblogs.com""")
79
 
80
  with st.sidebar:
81
  st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.")
82
  code = '''
83
+ <iframe src="https://aiecosystem-dataharvest.hf.space" frameborder="0" width="850" height="450"
 
 
 
 
84
  ></iframe>
 
85
  '''
86
  st.code(code, language="html")
87
  st.text("")
 
89
  st.divider()
90
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
91
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
92
  # --- Comet ML Setup ---
93
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
94
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 
96
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
97
  if not comet_initialized:
98
  st.warning("Comet ML not initialized. Check environment variables.")
 
99
  # --- Label Definitions ---
 
100
  labels = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 
101
  # Corrected mapping dictionary
102
  # Create a mapping dictionary for labels to categories
103
  category_mapping = {
104
  "People": ["person", "organization", "position"],
105
  "Locations": ["country", "city"],
106
  "Time": ["date", "time"],
107
+ "Numbers": ["money", "cardinal"]}
 
 
108
  # --- Model Loading ---
109
  @st.cache_resource
110
  def load_ner_model():
 
114
  except Exception as e:
115
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
116
  st.stop()
 
117
  model = load_ner_model()
 
118
  # Flatten the mapping to a single dictionary
119
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
120
  # --- Text Input and Clear Button ---
121
+ word_limit = 200
122
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
123
+ word_count = len(text.split())
124
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
125
  def clear_text():
126
  """Clears the text area."""
127
  st.session_state['my_text_area'] = ""
 
128
  def remove_punctuation(text):
129
  """Removes punctuation from a string."""
130
  translator = str.maketrans('', '', string.punctuation)
131
  return text.translate(translator)
 
132
  st.button("Clear text", on_click=clear_text)
 
133
  # --- Results Section ---
134
  if st.button("Results"):
135
  start_time = time.time()
136
  if not text.strip():
137
  st.warning("Please enter some text to extract entities.")
138
+ elif word_count > word_limit:
139
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
140
  else:
141
  # Call the new function to remove punctuation from the input text
142
  cleaned_text = remove_punctuation(text)
 
154
  )
155
  experiment.log_parameter("input_text", text)
156
  experiment.log_table("predicted_entities", df)
 
157
  st.subheader("Grouped Entities by Category", divider = "violet")
 
158
  # Create tabs for each category
159
  category_names = sorted(list(category_mapping.keys()))
160
  category_tabs = st.tabs(category_names)
 
161
  for i, category_name in enumerate(category_names):
162
  with category_tabs[i]:
163
  df_category_filtered = df[df['category'] == category_name]
 
165
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
166
  else:
167
  st.info(f"No entities found for the '{category_name}' category.")
 
168
  with st.expander("See Glossary of tags"):
169
  st.write('''
170
  - **text**: ['entity extracted from your text data']
 
174
  - **end**: ['index of the end of the corresponding entity']
175
  ''')
176
  st.divider()
 
177
  # Tree map
178
  st.subheader("Tree map", divider = "violet")
179
  fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
180
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
181
  st.plotly_chart(fig_treemap)
 
182
  # Pie and Bar charts
183
  grouped_counts = df['category'].value_counts().reset_index()
184
  grouped_counts.columns = ['category', 'count']
185
  col1, col2 = st.columns(2)
 
186
  with col1:
187
  st.subheader("Pie chart", divider = "violet")
188
  fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
189
  fig_pie.update_traces(textposition='inside', textinfo='percent+label')
190
  fig_pie.update_layout(
191
+ )
192
  st.plotly_chart(fig_pie)
 
193
  with col2:
194
  st.subheader("Bar chart", divider = "violet")
195
  fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
196
  fig_bar.update_layout( # Changed from fig_pie to fig_bar
197
+ )
198
  st.plotly_chart(fig_bar)
 
199
  # Most Frequent Entities
200
  st.subheader("Most Frequent Entities", divider="violet")
201
  word_counts = df['text'].value_counts().reset_index()
 
209
  st.plotly_chart(fig_repeating_bar)
210
  else:
211
  st.warning("No entities were found that occur more than once.")
 
212
  # Download Section
213
  st.divider()
 
214
  dfa = pd.DataFrame(
215
  data={
216
  'Column Name': ['text', 'label', 'score', 'start', 'end'],
 
227
  with zipfile.ZipFile(buf, "w") as myzip:
228
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
229
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
230
  with stylable_container(
231
  key="download_button",
232
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
237
  file_name="nlpblogs_results.zip",
238
  mime="application/zip",
239
  )
 
240
  if comet_initialized:
241
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
242
  experiment.end()
 
246
  elapsed_time = end_time - start_time
247
  st.text("")
248
  st.text("")
249
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")