AIEcosystem commited on
Commit
1c2d55a
·
verified ·
1 Parent(s): 1ec2a68

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +21 -64
src/streamlit_app.py CHANGED
@@ -1,7 +1,5 @@
1
-
2
  import os
3
  os.environ['HF_HOME'] = '/tmp'
4
-
5
  import time
6
  import streamlit as st
7
  import pandas as pd
@@ -15,14 +13,11 @@ from comet_ml import Experiment
15
  # --- App Configuration and Styling ---
16
  st.set_page_config(
17
  layout="wide",
18
- page_title="English Keyphrase"
19
- )
20
-
21
  st.markdown(
22
  """
23
  <style>
24
-
25
- .stApp {
26
  background-color: #f0f8ff; /* A single, solid color */
27
  color: #000000;
28
  font-family: 'Inter', sans-serif;
@@ -52,45 +47,26 @@ st.markdown(
52
 
53
  </style>
54
  """,
55
- unsafe_allow_html=True
56
- )
57
-
58
  # --- Comet ML Setup ---
59
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
60
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
61
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
62
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
63
-
64
  if not comet_initialized:
65
  st.warning("Comet ML not initialized. Check environment variables.")
66
-
67
  # --- UI Header and Notes ---
68
  st.subheader("AcademiaMiner", divider="rainbow")
69
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
70
  expander = st.expander("**Important notes**")
71
  expander.write('''**Entities:** This AcademiaMiner extracts keyphrases from English academic and scientific papers.
72
-
73
- Results are presented in easy-to-read tables, visualized in an interactive tree map and a bar chart, and are available for download along with a Glossary of tags.
74
-
75
- **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
76
-
77
- **Usage Limits:** You can request results unlimited times for one (1) month.
78
-
79
- **Supported Languages:** English
80
-
81
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
82
-
83
- For any errors or inquiries, please contact us at info@nlpblogs.com''')
84
-
85
  with st.sidebar:
86
  st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
87
  code = '''
88
- <iframe
89
- src="https://aiecosystem-academiaminer.hf.space"
90
- frameborder="0"
91
- width="850"
92
- height="450"
93
- ></iframe>
94
  '''
95
  st.code(code, language="html")
96
  st.text("")
@@ -98,7 +74,6 @@ with st.sidebar:
98
  st.divider()
99
  st.subheader("🚀 Ready to build your own AI Web App?", divider="rainbow")
100
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
101
-
102
  # --- Model Loading ---
103
  @st.cache_resource
104
  def load_ner_model():
@@ -112,25 +87,28 @@ def load_ner_model():
112
  except Exception as e:
113
  st.error(f"Failed to load NER model: {e}")
114
  st.stop()
115
-
116
  model = load_ner_model()
117
-
118
  # --- Main App Logic ---
119
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
120
-
 
 
 
 
 
121
  def clear_text():
122
  """Clears the text area."""
123
  st.session_state['my_text_area'] = ""
124
  st.session_state.text_processed = False
125
-
126
  st.button("Clear text", on_click=clear_text)
127
-
128
  if st.button("Results"):
 
129
  if not text.strip():
130
  st.warning("Please enter some text to extract keyphrases.")
 
 
131
  else:
132
  start_time_overall = time.time()
133
-
134
  # Initialize Comet ML experiment at the start
135
  experiment = None
136
  if comet_initialized:
@@ -143,12 +121,10 @@ if st.button("Results"):
143
  except Exception as e:
144
  st.warning(f"Could not initialize Comet ML experiment: {e}")
145
  experiment = None
146
-
147
  try:
148
  with st.spinner("Analyzing text...", ):
149
  # The pipeline model returns a list of dictionaries.
150
  entities = model(text)
151
-
152
  data = []
153
  for entity in entities:
154
  # 'ml6team/keyphrase-extraction-kbir-inspec' model doesn't have 'entity_group'
@@ -160,41 +136,30 @@ if st.button("Results"):
160
  'start': entity['start'],
161
  'end': entity['end']
162
  })
163
-
164
-
165
  if not data:
166
  st.warning("No keyphrases found in the text.")
167
  st.stop()
168
-
169
  df = pd.DataFrame(data)
170
-
171
  # --- Data Cleaning and Processing ---
172
  pattern = r'[^\w\s]'
173
  df['word'] = df['word'].replace(pattern, '', regex=True)
174
  df = df.replace('', 'Unknown')
175
-
176
  # --- All Extracted Keyphrases ---
177
  st.subheader("All Extracted Keyphrases", divider="rainbow")
178
  st.dataframe(df, use_container_width=True)
179
  with st.expander("See Glossary of tags"):
180
  st.write('''
181
  **word**: ['keyphrase extracted from your text data']
182
-
183
  **score**: ['accuracy score; how accurately a tag has been assigned']
184
-
185
  **label**: ['label (tag) assigned to a given extracted keyphrase']
186
-
187
  **start**: ['index of the start of the corresponding entity']
188
-
189
  **end**: ['index of the end of the corresponding entity']
190
  ''')
191
-
192
  # --- Most Frequent Keyphrases ---
193
  st.subheader("Most Frequent Keyphrases", divider="rainbow")
194
  word_counts = df['word'].value_counts().reset_index()
195
  word_counts.columns = ['word', 'count']
196
  df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)
197
-
198
  if not df_frequent.empty:
199
  tab1, tab2 = st.tabs(["Table", "Chart"])
200
  with tab1:
@@ -214,13 +179,11 @@ if st.button("Results"):
214
  paper_bgcolor='#f0f8ff', # Sets the background color of the entire figure
215
  plot_bgcolor='#f0f8ff' # Sets the background color of the plotting area
216
  )
217
-
218
  st.plotly_chart(fig_frequent_bar, use_container_width=True)
219
  if experiment:
220
  experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
221
  else:
222
  st.info("No keyphrases found with more than one occurrence.")
223
-
224
  # --- Treemap of All Keyphrases ---
225
  st.subheader("Treemap of All Keyphrases", divider="rainbow")
226
  # Use 'label' instead of 'entity_group'
@@ -235,7 +198,6 @@ if st.button("Results"):
235
  st.plotly_chart(fig_treemap, use_container_width=True)
236
  if experiment:
237
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
238
-
239
  # --- Download Section ---
240
  dfa = pd.DataFrame(
241
  data={
@@ -254,7 +216,6 @@ if st.button("Results"):
254
  myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
255
  myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
256
  myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
257
-
258
  with stylable_container(
259
  key="download_button",
260
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -266,7 +227,6 @@ if st.button("Results"):
266
  mime="application/zip",
267
  )
268
  st.divider()
269
-
270
  except Exception as e:
271
  st.error(f"An unexpected error occurred during processing: {e}")
272
  finally:
@@ -279,10 +239,7 @@ if st.button("Results"):
279
  experiment.end()
280
  except Exception as comet_e:
281
  st.warning(f"Comet ML experiment.end() failed: {comet_e}")
282
-
283
- # Show elapsed time
284
- end_time_overall = time.time()
285
- elapsed_time_overall = end_time_overall - start_time_overall
286
- st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")
287
-
288
-
 
 
1
  import os
2
  os.environ['HF_HOME'] = '/tmp'
 
3
  import time
4
  import streamlit as st
5
  import pandas as pd
 
13
  # --- App Configuration and Styling ---
14
  st.set_page_config(
15
  layout="wide",
16
+ page_title="English Keyphrase")
 
 
17
  st.markdown(
18
  """
19
  <style>
20
+ .stApp {
 
21
  background-color: #f0f8ff; /* A single, solid color */
22
  color: #000000;
23
  font-family: 'Inter', sans-serif;
 
47
 
48
  </style>
49
  """,
50
+ unsafe_allow_html=True)
 
 
51
  # --- Comet ML Setup ---
52
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
53
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
54
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
55
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
56
  if not comet_initialized:
57
  st.warning("Comet ML not initialized. Check environment variables.")
 
58
  # --- UI Header and Notes ---
59
  st.subheader("AcademiaMiner", divider="rainbow")
60
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
61
  expander = st.expander("**Important notes**")
62
  expander.write('''**Entities:** This AcademiaMiner extracts keyphrases from English academic and scientific papers.
63
+ Results are presented in easy-to-read tables, visualized in an interactive tree map and a bar chart, and are available for download along with a Glossary of tags.
64
+ **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.**Usage Limits:** You can request results unlimited times for one (1) month.**Supported Languages:** English**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com''')
 
 
 
 
 
 
 
 
 
 
 
65
  with st.sidebar:
66
  st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
67
  code = '''
68
+ <iframe src="https://aiecosystem-academiaminer.hf.space" frameborder="0" width="850" height="450"
69
+ ></iframe>
 
 
 
 
70
  '''
71
  st.code(code, language="html")
72
  st.text("")
 
74
  st.divider()
75
  st.subheader("🚀 Ready to build your own AI Web App?", divider="rainbow")
76
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
77
  # --- Model Loading ---
78
  @st.cache_resource
79
  def load_ner_model():
 
87
  except Exception as e:
88
  st.error(f"Failed to load NER model: {e}")
89
  st.stop()
 
90
  model = load_ner_model()
 
91
  # --- Main App Logic ---
92
+ # Define the word limit
93
+ word_limit = 200
94
+ # Update text area with the word limit
95
+ text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
96
+ # Calculate and display the word count
97
+ word_count = len(text.split())
98
+ st.markdown(f"**Word count:** {word_count}/{word_limit}")
99
  def clear_text():
100
  """Clears the text area."""
101
  st.session_state['my_text_area'] = ""
102
  st.session_state.text_processed = False
 
103
  st.button("Clear text", on_click=clear_text)
 
104
  if st.button("Results"):
105
+ # Check for word limit and empty text first
106
  if not text.strip():
107
  st.warning("Please enter some text to extract keyphrases.")
108
+ elif word_count > word_limit:
109
+ st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
110
  else:
111
  start_time_overall = time.time()
 
112
  # Initialize Comet ML experiment at the start
113
  experiment = None
114
  if comet_initialized:
 
121
  except Exception as e:
122
  st.warning(f"Could not initialize Comet ML experiment: {e}")
123
  experiment = None
 
124
  try:
125
  with st.spinner("Analyzing text...", ):
126
  # The pipeline model returns a list of dictionaries.
127
  entities = model(text)
 
128
  data = []
129
  for entity in entities:
130
  # 'ml6team/keyphrase-extraction-kbir-inspec' model doesn't have 'entity_group'
 
136
  'start': entity['start'],
137
  'end': entity['end']
138
  })
 
 
139
  if not data:
140
  st.warning("No keyphrases found in the text.")
141
  st.stop()
 
142
  df = pd.DataFrame(data)
 
143
  # --- Data Cleaning and Processing ---
144
  pattern = r'[^\w\s]'
145
  df['word'] = df['word'].replace(pattern, '', regex=True)
146
  df = df.replace('', 'Unknown')
 
147
  # --- All Extracted Keyphrases ---
148
  st.subheader("All Extracted Keyphrases", divider="rainbow")
149
  st.dataframe(df, use_container_width=True)
150
  with st.expander("See Glossary of tags"):
151
  st.write('''
152
  **word**: ['keyphrase extracted from your text data']
 
153
  **score**: ['accuracy score; how accurately a tag has been assigned']
 
154
  **label**: ['label (tag) assigned to a given extracted keyphrase']
 
155
  **start**: ['index of the start of the corresponding entity']
 
156
  **end**: ['index of the end of the corresponding entity']
157
  ''')
 
158
  # --- Most Frequent Keyphrases ---
159
  st.subheader("Most Frequent Keyphrases", divider="rainbow")
160
  word_counts = df['word'].value_counts().reset_index()
161
  word_counts.columns = ['word', 'count']
162
  df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)
 
163
  if not df_frequent.empty:
164
  tab1, tab2 = st.tabs(["Table", "Chart"])
165
  with tab1:
 
179
  paper_bgcolor='#f0f8ff', # Sets the background color of the entire figure
180
  plot_bgcolor='#f0f8ff' # Sets the background color of the plotting area
181
  )
 
182
  st.plotly_chart(fig_frequent_bar, use_container_width=True)
183
  if experiment:
184
  experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
185
  else:
186
  st.info("No keyphrases found with more than one occurrence.")
 
187
  # --- Treemap of All Keyphrases ---
188
  st.subheader("Treemap of All Keyphrases", divider="rainbow")
189
  # Use 'label' instead of 'entity_group'
 
198
  st.plotly_chart(fig_treemap, use_container_width=True)
199
  if experiment:
200
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
 
201
  # --- Download Section ---
202
  dfa = pd.DataFrame(
203
  data={
 
216
  myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
217
  myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
218
  myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
 
219
  with stylable_container(
220
  key="download_button",
221
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
 
227
  mime="application/zip",
228
  )
229
  st.divider()
 
230
  except Exception as e:
231
  st.error(f"An unexpected error occurred during processing: {e}")
232
  finally:
 
239
  experiment.end()
240
  except Exception as comet_e:
241
  st.warning(f"Comet ML experiment.end() failed: {comet_e}")
242
+ # Show elapsed time
243
+ end_time_overall = time.time()
244
+ elapsed_time_overall = end_time_overall - start_time_overall
245
+ st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")