AIEcosystem commited on
Commit
939f498
·
verified ·
1 Parent(s): 5d46ed5

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +142 -124
src/streamlit_app.py CHANGED
@@ -8,6 +8,8 @@ import plotly.express as px
8
  import zipfile
9
  import json
10
  import hashlib
 
 
11
  from typing import Optional
12
  from gliner import GLiNER
13
  from comet_ml import Experiment
@@ -16,6 +18,8 @@ from comet_ml import Experiment
16
  st.set_page_config(layout="wide", page_title="NER")
17
  st.subheader("HR.ai", divider="green")
18
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
 
19
  st.markdown(
20
  """
21
  <style>
@@ -69,9 +73,27 @@ st.markdown(
69
  }
70
  </style>
71
  """,
72
- unsafe_allow_html=True)
 
 
73
  expander = st.expander("**Important notes**")
74
- expander.write(""" **How to Use the HR.ai web app:** 1. Type or paste your text into the text area, then press Ctrl + Enter.2. Click the 'Results' button to extract and tag entities in your text data. Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags. **How to Use the Question-Answering feature:**1. Type or paste your text into the text area, then press Ctrl + Enter. 2. Click the 'Add Question' button to add your question to the Record of Questions. You can manage your questions by deleting them one by one.3. Click the 'Extract Answers' button to extract the answer to your question.Results are presented in an easy-to-read table, visualized in an interactive tree map, and is available for download. **Entities:** "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"**Usage Limits:** You can request results unlimited times for one (1) month. **Supported Languages:** English **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  with st.sidebar:
76
  st.write("Use the following code to embed the web app on your website. Feel free to adjust the width and height values to fit your page.")
77
  code = '''
@@ -94,6 +116,7 @@ COMET_API_KEY = os.environ.get("COMET_API_KEY")
94
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
95
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
96
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
97
  if not comet_initialized:
98
  st.warning("Comet ML not initialized. Check environment variables.")
99
 
@@ -112,6 +135,7 @@ def load_gliner_model(model_name):
112
 
113
  # --- HR_AI Model Labels and Mappings ---
114
  labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
 
115
  category_mapping = {
116
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
117
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
@@ -124,7 +148,8 @@ category_mapping = {
124
  "Deductions": ["Tax", "Deductions"],
125
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
126
  "Legal & Compliance": ["Offer_letter", "Agreement"],
127
- "Professional_Development": ["Certification", "Skill"]}
 
128
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
129
 
130
  # --- InfoFinder Helpers ---
@@ -138,17 +163,17 @@ def get_stable_color(label):
138
 
139
  # --- Main App with Tabs ---
140
  tab1, tab2 = st.tabs(["HR.ai", "Question-Answering"])
 
141
  with tab1:
 
 
142
  # Load model for this tab
143
  model_hr = load_gliner_model("HR_AI")
 
144
  text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_hr')
145
 
146
  def clear_text_hr():
147
  st.session_state['my_text_area_hr'] = ""
148
- st.session_state.hr_data_to_download = None
149
-
150
- if 'hr_data_to_download' not in st.session_state:
151
- st.session_state.hr_data_to_download = None
152
 
153
  st.button("Clear text", on_click=clear_text_hr, key="clear_hr")
154
 
@@ -156,7 +181,6 @@ with tab1:
156
  start_time = time.time()
157
  if not text.strip():
158
  st.warning("Please enter some text to extract entities.")
159
- st.session_state.hr_data_to_download = None
160
  else:
161
  with st.spinner("Extracting entities...", show_time=True):
162
  entities = model_hr.predict_entities(text, labels)
@@ -169,108 +193,103 @@ with tab1:
169
  experiment.log_parameter("input_text", text)
170
  experiment.log_table("predicted_entities", df)
171
 
172
- # Prepare data for download and store it in session state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  dfa = pd.DataFrame(data={'Column Name': ['text', 'label', 'score', 'start', 'end'], 'Description': ['entity extracted from your text data', 'label (tag) assigned to a given extracted entity', 'accuracy score; how accurately a tag has been assigned to a given entity', 'index of the start of the corresponding entity', 'index of the end of the corresponding entity']})
174
  buf = io.BytesIO()
175
  with zipfile.ZipFile(buf, "w") as myzip:
176
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
177
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
178
- st.session_state.hr_data_to_download = buf.getvalue()
179
 
180
- st.session_state.hr_df = df
 
 
 
 
 
 
 
 
 
181
  else:
182
- st.session_state.hr_data_to_download = None
183
  st.warning("No entities were found in the provided text.")
184
-
185
- end_time = time.time()
186
- elapsed_time = end_time - start_time
187
- st.text("")
188
- st.text("")
189
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
190
-
191
- # Display logic for HR tab (always runs if data is in session state)
192
- if 'hr_df' in st.session_state and not st.session_state.hr_df.empty:
193
- df = st.session_state.hr_df
194
- st.subheader("Grouped Entities by Category", divider="green")
195
- category_names = sorted(list(category_mapping.keys()))
196
- category_tabs_hr = st.tabs(category_names)
197
- for i, category_name in enumerate(category_names):
198
- with category_tabs_hr[i]:
199
- df_category_filtered = df[df['category'] == category_name]
200
- if not df_category_filtered.empty:
201
- st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
202
- else:
203
- st.info(f"No entities found for the '{category_name}' category.")
204
-
205
- with st.expander("See Glossary of tags"):
206
- st.write('''
207
- - **text**: ['entity extracted from your text data']
208
- - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
209
- - **label**: ['label (tag) assigned to a given extracted entity']
210
- - **start**: ['index of the start of the corresponding entity']
211
- - **end**: ['index of the end of the corresponding entity']
212
- ''')
213
- st.divider()
214
-
215
- st.subheader("Candidate Card", divider="green")
216
- fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
217
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
218
- st.plotly_chart(fig_treemap)
219
-
220
- col1, col2 = st.columns(2)
221
- with col1:
222
- st.subheader("Pie chart", divider="green")
223
- grouped_counts = df['category'].value_counts().reset_index()
224
- grouped_counts.columns = ['category', 'count']
225
- fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
226
- fig_pie.update_traces(textposition='inside', textinfo='percent+label')
227
- fig_pie.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
228
- st.plotly_chart(fig_pie)
229
-
230
- with col2:
231
- st.subheader("Bar chart", divider="green")
232
- fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
233
- fig_bar.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
234
- st.plotly_chart(fig_bar)
235
-
236
- st.subheader("Most Frequent Entities", divider="green")
237
- word_counts = df['text'].value_counts().reset_index()
238
- word_counts.columns = ['Entity', 'Count']
239
- repeating_entities = word_counts[word_counts['Count'] > 1]
240
- if not repeating_entities.empty:
241
- st.dataframe(repeating_entities, use_container_width=True)
242
- fig_repeating_bar = px.bar(repeating_entities, x='Entity', y='Count', color='Entity')
243
- fig_repeating_bar.update_layout(xaxis={'categoryorder': 'total descending'}, paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
244
- st.plotly_chart(fig_repeating_bar)
245
- else:
246
- st.warning("No entities were found that occur more than once.")
247
-
248
- st.divider()
249
- if st.session_state.hr_data_to_download:
250
- st.download_button(
251
- label="Download results and glossary (zip)",
252
- data=st.session_state.hr_data_to_download,
253
- file_name="nlpblogs_results.zip",
254
- mime="application/zip",
255
- )
256
- if comet_initialized:
257
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
258
- experiment.end()
259
 
260
  with tab2:
 
 
261
  # Load model for this tab
262
  model_qa = load_gliner_model("InfoFinder")
 
263
  user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_infofinder')
264
-
265
  def clear_text_qa():
266
  st.session_state['my_text_area_infofinder'] = ""
267
- st.session_state.user_labels = []
268
- st.session_state.qa_data_to_download = None
269
 
270
  st.button("Clear text", on_click=clear_text_qa, key="clear_qa")
 
271
  st.subheader("Question-Answering", divider="green")
272
  question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
273
-
274
  if st.button("Add Question"):
275
  if question_input:
276
  if question_input not in st.session_state.user_labels:
@@ -280,10 +299,10 @@ with tab2:
280
  st.warning("This question has already been added.")
281
  else:
282
  st.warning("Please enter a question.")
283
-
284
  st.markdown("---")
285
  st.subheader("Record of Questions", divider="green")
286
-
287
  if st.session_state.user_labels:
288
  for i, label in enumerate(st.session_state.user_labels):
289
  col_list, col_delete = st.columns([0.9, 0.1])
@@ -295,9 +314,9 @@ with tab2:
295
  st.rerun()
296
  else:
297
  st.info("No questions defined yet. Use the input above to add one.")
298
-
299
  st.divider()
300
-
301
  if st.button("Extract Answers"):
302
  if not user_text.strip():
303
  st.warning("Please enter some text to analyze.")
@@ -308,7 +327,7 @@ with tab2:
308
  experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
309
  experiment.log_parameter("input_text_length", len(user_text))
310
  experiment.log_parameter("defined_labels", st.session_state.user_labels)
311
-
312
  start_time = time.time()
313
  with st.spinner("Analyzing text...", show_time=True):
314
  try:
@@ -316,45 +335,44 @@ with tab2:
316
  end_time = time.time()
317
  elapsed_time = end_time - start_time
318
  st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
319
-
320
  if entities:
321
  df1 = pd.DataFrame(entities)
322
  df2 = df1[['label', 'text', 'score']]
323
  df = df2.rename(columns={'label': 'question', 'text': 'answer'})
324
- st.session_state.qa_data_to_download = df.to_csv(index=False).encode('utf-8')
325
- st.session_state.qa_df = df
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  if comet_initialized:
328
  experiment.log_metric("processing_time_seconds", elapsed_time)
329
  experiment.log_table("predicted_entities", df)
 
 
330
  else:
331
- st.session_state.qa_data_to_download = None
332
- st.warning("No answers were found in the text with the defined questions.")
 
333
  except Exception as e:
334
  st.error(f"An error occurred during processing: {e}")
335
  st.write(f"Error details: {e}")
336
- finally:
337
  if comet_initialized:
 
338
  experiment.end()
339
-
340
- # Display logic for QA tab (always runs if data is in session state)
341
- if 'qa_df' in st.session_state and not st.session_state.qa_df.empty:
342
- df = st.session_state.qa_df
343
- st.subheader("Extracted Answers", divider="green")
344
- st.dataframe(df, use_container_width=True)
345
- st.subheader("Tree map", divider="green")
346
- all_labels = df['question'].unique()
347
- label_color_map = {label: get_stable_color(label) for label in all_labels}
348
- fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
349
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
350
- st.plotly_chart(fig_treemap)
351
-
352
- if st.session_state.qa_data_to_download:
353
- st.download_button(
354
- label="Download CSV",
355
- data=st.session_state.qa_data_to_download,
356
- file_name="nlpblogs_questions_answers.csv",
357
- mime="text/csv",
358
- )
359
- if comet_initialized:
360
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
 
8
  import zipfile
9
  import json
10
  import hashlib
11
+
12
+
13
  from typing import Optional
14
  from gliner import GLiNER
15
  from comet_ml import Experiment
 
18
  st.set_page_config(layout="wide", page_title="NER")
19
  st.subheader("HR.ai", divider="green")
20
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
21
+
22
+
23
  st.markdown(
24
  """
25
  <style>
 
73
  }
74
  </style>
75
  """,
76
+ unsafe_allow_html=True
77
+ )
78
+
79
  expander = st.expander("**Important notes**")
80
+ expander.write("""
81
+ **How to Use the HR.ai web app:**
82
+ 1. Type or paste your text into the text area, then press Ctrl + Enter.
83
+ 2. Click the 'Results' button to extract and tag entities in your text data.
84
+ Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
85
+ **How to Use the Question-Answering feature:**
86
+ 1. Type or paste your text into the text area, then press Ctrl + Enter.
87
+ 2. Click the 'Add Question' button to add your question to the Record of Questions. You can manage your questions by deleting them one by one.
88
+ 3. Click the 'Extract Answers' button to extract the answer to your question.
89
+ Results are presented in an easy-to-read table, visualized in an interactive tree map, and is available for download.
90
+ **Entities:** "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
91
+ **Usage Limits:** You can request results unlimited times for one (1) month.
92
+ **Supported Languages:** English
93
+ **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
94
+ For any errors or inquiries, please contact us at info@nlpblogs.com""")
95
+
96
+
97
  with st.sidebar:
98
  st.write("Use the following code to embed the web app on your website. Feel free to adjust the width and height values to fit your page.")
99
  code = '''
 
116
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
117
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
118
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
119
+
120
  if not comet_initialized:
121
  st.warning("Comet ML not initialized. Check environment variables.")
122
 
 
135
 
136
  # --- HR_AI Model Labels and Mappings ---
137
  labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
138
+
139
  category_mapping = {
140
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
141
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
 
148
  "Deductions": ["Tax", "Deductions"],
149
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
150
  "Legal & Compliance": ["Offer_letter", "Agreement"],
151
+ "Professional_Development": ["Certification", "Skill"]
152
+ }
153
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
154
 
155
  # --- InfoFinder Helpers ---
 
163
 
164
  # --- Main App with Tabs ---
165
  tab1, tab2 = st.tabs(["HR.ai", "Question-Answering"])
166
+
167
  with tab1:
168
+
169
+
170
  # Load model for this tab
171
  model_hr = load_gliner_model("HR_AI")
172
+
173
  text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_hr')
174
 
175
  def clear_text_hr():
176
  st.session_state['my_text_area_hr'] = ""
 
 
 
 
177
 
178
  st.button("Clear text", on_click=clear_text_hr, key="clear_hr")
179
 
 
181
  start_time = time.time()
182
  if not text.strip():
183
  st.warning("Please enter some text to extract entities.")
 
184
  else:
185
  with st.spinner("Extracting entities...", show_time=True):
186
  entities = model_hr.predict_entities(text, labels)
 
193
  experiment.log_parameter("input_text", text)
194
  experiment.log_table("predicted_entities", df)
195
 
196
+ st.subheader("Grouped Entities by Category", divider="green")
197
+ category_names = sorted(list(category_mapping.keys()))
198
+ category_tabs_hr = st.tabs(category_names)
199
+ for i, category_name in enumerate(category_names):
200
+ with category_tabs_hr[i]:
201
+ df_category_filtered = df[df['category'] == category_name]
202
+ if not df_category_filtered.empty:
203
+ st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
204
+ else:
205
+ st.info(f"No entities found for the '{category_name}' category.")
206
+
207
+ with st.expander("See Glossary of tags"):
208
+ st.write('''
209
+ - **text**: ['entity extracted from your text data']
210
+ - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
211
+ - **label**: ['label (tag) assigned to a given extracted entity']
212
+ - **start**: ['index of the start of the corresponding entity']
213
+ - **end**: ['index of the end of the corresponding entity']
214
+ ''')
215
+ st.divider()
216
+
217
+ st.subheader("Candidate Card", divider="green")
218
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
219
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
220
+ st.plotly_chart(fig_treemap)
221
+
222
+ col1, col2 = st.columns(2)
223
+ with col1:
224
+ st.subheader("Pie chart", divider="green")
225
+ grouped_counts = df['category'].value_counts().reset_index()
226
+ grouped_counts.columns = ['category', 'count']
227
+ fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
228
+ fig_pie.update_traces(textposition='inside', textinfo='percent+label')
229
+ fig_pie.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
230
+ st.plotly_chart(fig_pie)
231
+
232
+ with col2:
233
+ st.subheader("Bar chart", divider="green")
234
+ fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
235
+ fig_bar.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
236
+ st.plotly_chart(fig_bar)
237
+
238
+ st.subheader("Most Frequent Entities", divider="green")
239
+ word_counts = df['text'].value_counts().reset_index()
240
+ word_counts.columns = ['Entity', 'Count']
241
+ repeating_entities = word_counts[word_counts['Count'] > 1]
242
+ if not repeating_entities.empty:
243
+ st.dataframe(repeating_entities, use_container_width=True)
244
+ fig_repeating_bar = px.bar(repeating_entities, x='Entity', y='Count', color='Entity')
245
+ fig_repeating_bar.update_layout(xaxis={'categoryorder': 'total descending'}, paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
246
+ st.plotly_chart(fig_repeating_bar)
247
+ else:
248
+ st.warning("No entities were found that occur more than once.")
249
+
250
+ st.divider()
251
+
252
  dfa = pd.DataFrame(data={'Column Name': ['text', 'label', 'score', 'start', 'end'], 'Description': ['entity extracted from your text data', 'label (tag) assigned to a given extracted entity', 'accuracy score; how accurately a tag has been assigned to a given entity', 'index of the start of the corresponding entity', 'index of the end of the corresponding entity']})
253
  buf = io.BytesIO()
254
  with zipfile.ZipFile(buf, "w") as myzip:
255
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
256
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
257
 
258
+ st.download_button(
259
+ label="Download results and glossary (zip)",
260
+ data=buf.getvalue(),
261
+ file_name="nlpblogs_results.zip",
262
+ mime="application/zip",
263
+ )
264
+
265
+ if comet_initialized:
266
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
267
+ experiment.end()
268
  else:
 
269
  st.warning("No entities were found in the provided text.")
270
+
271
+ end_time = time.time()
272
+ elapsed_time = end_time - start_time
273
+ st.text("")
274
+ st.text("")
275
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  with tab2:
278
+
279
+
280
  # Load model for this tab
281
  model_qa = load_gliner_model("InfoFinder")
282
+
283
  user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_infofinder')
284
+
285
  def clear_text_qa():
286
  st.session_state['my_text_area_infofinder'] = ""
 
 
287
 
288
  st.button("Clear text", on_click=clear_text_qa, key="clear_qa")
289
+
290
  st.subheader("Question-Answering", divider="green")
291
  question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
292
+
293
  if st.button("Add Question"):
294
  if question_input:
295
  if question_input not in st.session_state.user_labels:
 
299
  st.warning("This question has already been added.")
300
  else:
301
  st.warning("Please enter a question.")
302
+
303
  st.markdown("---")
304
  st.subheader("Record of Questions", divider="green")
305
+
306
  if st.session_state.user_labels:
307
  for i, label in enumerate(st.session_state.user_labels):
308
  col_list, col_delete = st.columns([0.9, 0.1])
 
314
  st.rerun()
315
  else:
316
  st.info("No questions defined yet. Use the input above to add one.")
317
+
318
  st.divider()
319
+
320
  if st.button("Extract Answers"):
321
  if not user_text.strip():
322
  st.warning("Please enter some text to analyze.")
 
327
  experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
328
  experiment.log_parameter("input_text_length", len(user_text))
329
  experiment.log_parameter("defined_labels", st.session_state.user_labels)
330
+
331
  start_time = time.time()
332
  with st.spinner("Analyzing text...", show_time=True):
333
  try:
 
335
  end_time = time.time()
336
  elapsed_time = end_time - start_time
337
  st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
338
+
339
  if entities:
340
  df1 = pd.DataFrame(entities)
341
  df2 = df1[['label', 'text', 'score']]
342
  df = df2.rename(columns={'label': 'question', 'text': 'answer'})
 
 
343
 
344
+ st.subheader("Extracted Answers", divider="green")
345
+ st.dataframe(df, use_container_width=True)
346
+
347
+ st.subheader("Tree map", divider="green")
348
+ all_labels = df['question'].unique()
349
+ label_color_map = {label: get_stable_color(label) for label in all_labels}
350
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
351
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
352
+ st.plotly_chart(fig_treemap)
353
+
354
+ csv_data = df.to_csv(index=False).encode('utf-8')
355
+ st.download_button(
356
+ label="Download CSV",
357
+ data=csv_data,
358
+ file_name="nlpblogs_questions_answers.csv",
359
+ mime="text/csv",
360
+ )
361
+
362
  if comet_initialized:
363
  experiment.log_metric("processing_time_seconds", elapsed_time)
364
  experiment.log_table("predicted_entities", df)
365
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
366
+ experiment.end()
367
  else:
368
+ st.info("No answers were found in the text with the defined questions.")
369
+ if comet_initialized:
370
+ experiment.end()
371
  except Exception as e:
372
  st.error(f"An error occurred during processing: {e}")
373
  st.write(f"Error details: {e}")
 
374
  if comet_initialized:
375
+ experiment.log_text(f"Error: {e}")
376
  experiment.end()
377
+
378
+