AIEcosystem commited on
Commit
97d4895
·
verified ·
1 Parent(s): d1553d4

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +268 -229
src/streamlit_app.py CHANGED
@@ -7,96 +7,119 @@ import io
7
  import plotly.express as px
8
  import zipfile
9
  import json
 
 
10
  from streamlit_extras.stylable_container import stylable_container
11
  from typing import Optional
12
  from gliner import GLiNER
13
  from comet_ml import Experiment
14
- import hashlib
15
 
16
  # --- Page Configuration and UI Elements ---
17
- st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 
 
 
18
  st.markdown(
19
  """
20
  <style>
21
- /* Main app background and text color */
22
  .stApp {
23
- background-color: #F5FFFA; /* Mint cream, a very light green */
24
- color: #000000; /* Black for the text */
25
  }
26
- /* Sidebar background color */
27
  .css-1d36184 {
28
- background-color: #B2F2B2; /* A pale green for the sidebar */
29
  secondary-background-color: #B2F2B2;
30
  }
31
- /* Expander background color */
32
  .streamlit-expanderContent {
33
  background-color: #F5FFFA;
34
  }
35
- /* Expander header background color */
36
  .streamlit-expanderHeader {
37
  background-color: #F5FFFA;
38
  }
39
- /* Text Area background and text color */
40
  .stTextArea textarea {
41
- background-color: #D4F4D4; /* A light, soft green */
42
- color: #000000; /* Black for text */
43
  }
44
- /* Text input background and text color */
45
- .stTextInput textinput {
46
- background-color: #D4F4D4; /* A light, soft green */
47
- color: #000000; /* Black for text */
48
  }
49
- /* Button background and text color */
50
  .stButton > button {
51
  background-color: #D4F4D4;
52
  color: #000000;
53
  }
54
- /* Warning box background and text color */
55
  .stAlert.st-warning {
56
- background-color: #C8F0C8; /* A light green for the warning box */
57
  color: #000000;
58
  }
59
- /* Success box background and text color */
60
  .stAlert.st-success {
61
- background-color: #C8F0C8; /* A light green for the success box */
62
  color: #000000;
63
  }
 
 
 
64
  </style>
65
  """,
66
- unsafe_allow_html=True)
67
- st.subheader("HR.ai", divider="green")
68
- st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
69
  expander = st.expander("**Important notes**")
70
- expander.write("""**Named Entities:** This HR.ai predicts thirty-five (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.**Usage Limits:** You can request results unlimited times for one (1) month.**Supported Languages:** English**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
 
 
 
 
71
  with st.sidebar:
72
- st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
73
  code = '''
74
- <iframe src="https://aiecosystem-hr-ai.hf.space" frameborder="0" width="850" height="450" ></iframe>
 
 
 
 
 
75
  '''
76
  st.code(code, language="html")
77
  st.text("")
78
  st.text("")
79
  st.divider()
80
  st.subheader("🚀 Ready to build your own AI Web App?", divider="green")
81
- st.link_button("AI Web App Builder", " https://nlpblogs.com/custom-web-app-development/", type="primary")
82
 
83
  # --- Comet ML Setup ---
84
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
85
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
86
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
87
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
88
  if not comet_initialized:
89
  st.warning("Comet ML not initialized. Check environment variables.")
90
 
91
- # --- Label Definitions ---
92
- labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- # Create a mapping dictionary for labels to categories
95
  category_mapping = {
96
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
97
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
98
  "Employment Status": ["Full_time", "Part_time", "Contract", "Terminated", "Retired"],
99
- "Employment Information": ["Date", "Organization", "Role"],
100
  "Performance": ["Performance_score"],
101
  "Attendance": ["Leave_of_absence"],
102
  "Benefits": ["Retirement_plan", "Bonus", "Stock_options", "Health_insurance"],
@@ -104,218 +127,234 @@ category_mapping = {
104
  "Deductions": ["Tax", "Deductions"],
105
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
106
  "Legal & Compliance": ["Offer_letter", "Agreement"],
107
- "Professional_Development": ["Certification", "Skill"]}
 
 
108
 
109
- # --- Model Loading ---
110
- @st.cache_resource
111
- def load_ner_model():
112
- """Loads the GLiNER model and caches it."""
113
- try:
114
- return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
115
- except Exception as e:
116
- st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
117
- st.stop()
118
- model = load_ner_model()
119
 
120
- # Flatten the mapping to a single dictionary
121
- reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 
 
122
 
123
- # --- Text Input and Clear Button ---
124
- text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
125
-
126
- def clear_text():
127
- """Clears the text area and session state."""
128
- st.session_state['my_text_area'] = ""
129
- # Clear stored results
130
- for key in ['df_ner', 'df_qa', 'fig_treemap', 'user_labels']:
131
- if key in st.session_state:
132
- del st.session_state[key]
133
- st.rerun()
134
-
135
- st.button("Clear text", on_click=clear_text)
136
-
137
- # --- Results Section for NER ---
138
- if st.button("Results"):
139
- start_time = time.time()
140
- if not text.strip():
141
- st.warning("Please enter some text to extract entities.")
142
- else:
143
- with st.spinner("Extracting entities...", show_time=True):
144
- entities = model.predict_entities(text, labels)
145
- df_ner = pd.DataFrame(entities)
146
- if not df_ner.empty:
147
- df_ner['category'] = df_ner['label'].map(reverse_category_mapping)
148
- st.session_state.df_ner = df_ner # Store df in session state
149
- if comet_initialized:
150
- experiment = Experiment(
151
- api_key=COMET_API_KEY,
152
- workspace=COMET_WORKSPACE,
153
- project_name=COMET_PROJECT_NAME,
154
- )
155
- experiment.log_parameter("input_text", text)
156
- experiment.log_table("predicted_entities", df_ner)
157
- else:
158
- st.warning("No entities were found in the provided text.")
159
- if 'df_ner' in st.session_state:
160
- del st.session_state.df_ner
161
-
162
- # --- Display Sections based on Session State ---
163
- if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
164
- st.subheader("Grouped Entities by Category", divider="green")
165
- category_names = sorted(list(category_mapping.keys()))
166
- category_tabs = st.tabs(category_names)
167
- for i, category_name in enumerate(category_names):
168
- with category_tabs[i]:
169
- df_category_filtered = st.session_state.df_ner[st.session_state.df_ner['category'] == category_name]
170
- if not df_category_filtered.empty:
171
- st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
172
- else:
173
- st.info(f"No entities found for the '{category_name}' category.")
174
- with st.expander("See Glossary of tags"):
175
- st.write('''
176
- - **text**: ['entity extracted from your text data']
177
- - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
178
- - **label**: ['label (tag) assigned to a given extracted entity']
179
- - **category**: ['the high-level category for the label']
180
- - **start**: ['index of the start of the corresponding entity']
181
- - **end**: ['index of the end of the corresponding entity']
182
- ''')
183
- st.divider()
184
- st.subheader("Candidate Card", divider="green")
185
- fig_treemap = px.treemap(st.session_state.df_ner, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
186
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
187
- st.plotly_chart(fig_treemap)
188
- df_ner_results = st.session_state.df_ner.drop(columns=['category']) # Define df_ner_results here
189
- dfa = pd.DataFrame(
190
- data={
191
- 'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
192
- 'Description': [
193
- 'entity extracted from your text data',
194
- 'label (tag) assigned to a given extracted entity',
195
- 'accuracy score; how accurately a tag has been assigned to a given entity',
196
- 'index of the start of the corresponding entity',
197
- 'index of the end of the corresponding entity',
198
- 'the broader category the entity belongs to',]}
199
- )
200
- buf = io.BytesIO()
201
- with zipfile.ZipFile(buf, "w") as myzip:
202
- myzip.writestr("Summary of the results.csv", df_ner_results.to_csv(index=False)) # Use df_ner_results
203
- myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
204
-
205
- with stylable_container(
206
- key="download_button",
207
- css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
208
- ):
209
- st.download_button(
210
- label="Download results and glossary (zip)",
211
- data=buf.getvalue(),
212
- file_name="nlpblogs_results.zip",
213
- mime="application/zip",)
214
 
215
- if comet_initialized:
216
- experiment = Experiment(
217
- api_key=COMET_API_KEY,
218
- workspace=COMET_WORKSPACE,
219
- project_name=COMET_PROJECT_NAME,
220
- )
221
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
222
- experiment.end()
223
- else:
224
- st.warning("No entities were found in the provided text.")
225
 
226
- # --- Question Answering Section ---
227
- @st.cache_resource
228
- def load_gliner_model():
229
- """Initializes and caches the GLiNER model for QA."""
230
- try:
231
- return GLiNER.from_pretrained("knowledgator/gliner-multitask-v1.0", device="cpu")
232
- except Exception as e:
233
- st.error(f"Error loading the GLiNER model: {e}")
234
- st.stop()
235
 
236
- qa_model = load_gliner_model()
237
- st.subheader("Question-Answering", divider="green")
238
- if 'user_labels' not in st.session_state:
239
- st.session_state.user_labels = []
240
 
241
- question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
242
 
243
- if st.button("Add Question"):
244
- if question_input:
245
- if question_input not in st.session_state.user_labels:
246
- st.session_state.user_labels.append(question_input)
247
- st.success(f"Added question: {question_input}")
248
  else:
249
- st.warning("This question has already been added.")
250
- else:
251
- st.warning("Please enter a question.")
252
- st.rerun()
253
-
254
- st.markdown("---")
255
- st.subheader("Record of Questions", divider="green")
256
-
257
- if st.session_state.user_labels:
258
- for i, label in enumerate(st.session_state.user_labels):
259
- col_list, col_delete = st.columns([0.9, 0.1])
260
- with col_list:
261
- st.write(f"- {label}", key=f"label_{i}")
262
- with col_delete:
263
- if st.button("Delete", key=f"delete_{i}"):
264
- st.session_state.user_labels.pop(i)
265
- st.rerun()
266
- else:
267
- st.info("No questions defined yet. Use the input above to add one.")
268
- st.divider()
269
- if st.button("Extract Answers"):
270
- if not text.strip():
271
- st.warning("Please enter some text to analyze.")
272
- elif not st.session_state.user_labels:
273
- st.warning("Please define at least one question.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  else:
275
- if comet_initialized:
276
- experiment = Experiment(
277
- api_key=COMET_API_KEY,
278
- workspace=COMET_WORKSPACE,
279
- project_name=COMET_PROJECT_NAME
280
- )
281
- experiment.log_parameter("input_text_length", len(text))
282
- experiment.log_parameter("defined_labels", st.session_state.user_labels)
283
 
284
- start_time = time.time()
285
- with st.spinner("Analyzing text...", show_time=True):
286
- try:
287
- entities = qa_model.predict_entities(text, st.session_state.user_labels)
288
- end_time = time.time()
289
- elapsed_time = end_time - start_time
290
- st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
291
- if entities:
292
- df_qa = pd.DataFrame(entities)
293
- df_qa = df_qa[['label', 'text', 'score']].rename(columns={'label': 'question', 'text': 'answer'})
294
- st.session_state.df_qa = df_qa # Store QA results in session state
295
- st.subheader("Extracted Answers", divider="green")
296
- st.dataframe(df_qa, use_container_width=True)
297
- csv_data = df_qa.to_csv(index=False).encode('utf-8')
298
- with stylable_container(
299
- key="download_button_qa",
300
- css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
301
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  st.download_button(
303
  label="Download CSV",
304
  data=csv_data,
305
- file_name="nlpblogs_extracted_answers.csv",
306
  mime="text/csv",
307
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  if comet_initialized:
309
- experiment.log_metric("processing_time_seconds", elapsed_time)
310
- experiment.log_table("predicted_entities", df_qa)
311
- experiment.end()
312
- else:
313
- st.info("No answers were found in the text with the defined questions.")
314
- if comet_initialized:
315
  experiment.end()
316
- except Exception as e:
317
- st.error(f"An error occurred during processing: {e}")
318
- st.write(f"Error details: {e}")
319
- if comet_initialized:
320
- experiment.log_text(f"Error: {e}")
321
- experiment.end()
 
7
  import plotly.express as px
8
  import zipfile
9
  import json
10
+ import hashlib
11
+
12
  from streamlit_extras.stylable_container import stylable_container
13
  from typing import Optional
14
  from gliner import GLiNER
15
  from comet_ml import Experiment
 
16
 
17
  # --- Page Configuration and UI Elements ---
18
+ st.set_page_config(layout="wide", page_title="Combined NLP App")
19
+ st.subheader("Combined NLP App", divider="green")
20
+ st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
21
+
22
  st.markdown(
23
  """
24
  <style>
 
25
  .stApp {
26
+ background-color: #F5FFFA;
27
+ color: #000000;
28
  }
 
29
  .css-1d36184 {
30
+ background-color: #B2F2B2;
31
  secondary-background-color: #B2F2B2;
32
  }
 
33
  .streamlit-expanderContent {
34
  background-color: #F5FFFA;
35
  }
 
36
  .streamlit-expanderHeader {
37
  background-color: #F5FFFA;
38
  }
 
39
  .stTextArea textarea {
40
+ background-color: #D4F4D4;
41
+ color: #000000;
42
  }
43
+ .stTextInput input {
44
+ background-color: #B39DDB;
45
+ color: #1A0A26;
 
46
  }
 
47
  .stButton > button {
48
  background-color: #D4F4D4;
49
  color: #000000;
50
  }
 
51
  .stAlert.st-warning {
52
+ background-color: #C8F0C8;
53
  color: #000000;
54
  }
 
55
  .stAlert.st-success {
56
+ background-color: #C8F0C8;
57
  color: #000000;
58
  }
59
+ .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
60
+ font-size: 1.25rem;
61
+ }
62
  </style>
63
  """,
64
+ unsafe_allow_html=True
65
+ )
66
+
67
  expander = st.expander("**Important notes**")
68
+ expander.write("""
69
+ This application combines two NLP tools in one place:
70
+ - **HR.ai**: Specializes in extracting 36 predefined HR-related entities from your text.
71
+ - **InfoFinder**: Answers custom wh-questions by finding relevant entities in your text.
72
+
73
+ Both tools feature interactive visualizations and data download options.
74
+ """)
75
+
76
  with st.sidebar:
77
+ st.write("Use the following code to embed the web app on your website. Feel free to adjust the width and height values to fit your page.")
78
  code = '''
79
+ <iframe
80
+ src="https://aiecosystem-hr-ai.hf.space"
81
+ frameborder="0"
82
+ width="850"
83
+ height="450"
84
+ ></iframe>
85
  '''
86
  st.code(code, language="html")
87
  st.text("")
88
  st.text("")
89
  st.divider()
90
  st.subheader("🚀 Ready to build your own AI Web App?", divider="green")
91
+ st.link_button("AI Web App Builder", "https://nlpblogs.com/custom-web-app-development/", type="primary")
92
 
93
  # --- Comet ML Setup ---
94
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
95
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
96
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
97
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
98
+
99
  if not comet_initialized:
100
  st.warning("Comet ML not initialized. Check environment variables.")
101
 
102
+ # --- Model Loading and Caching ---
103
+ @st.cache_resource
104
+ def load_gliner_model(model_name):
105
+ """Initializes and caches the GLiNER model."""
106
+ try:
107
+ if model_name == "HR_AI":
108
+ return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
109
+ elif model_name == "InfoFinder":
110
+ return GLiNER.from_pretrained("knowledgator/gliner-multitask-v1.0", device="cpu")
111
+ except Exception as e:
112
+ st.error(f"Error loading the GLiNER model: {e}")
113
+ st.stop()
114
+
115
+ # --- HR_AI Model Labels and Mappings ---
116
+ labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
117
 
 
118
  category_mapping = {
119
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
120
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
121
  "Employment Status": ["Full_time", "Part_time", "Contract", "Terminated", "Retired"],
122
+ "Employment Information": ["Job_title", "Date", "Organization", "Role"],
123
  "Performance": ["Performance_score"],
124
  "Attendance": ["Leave_of_absence"],
125
  "Benefits": ["Retirement_plan", "Bonus", "Stock_options", "Health_insurance"],
 
127
  "Deductions": ["Tax", "Deductions"],
128
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
129
  "Legal & Compliance": ["Offer_letter", "Agreement"],
130
+ "Professional_Development": ["Certification", "Skill"]
131
+ }
132
+ reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
133
 
134
+ # --- InfoFinder Helpers ---
135
+ if 'user_labels' not in st.session_state:
136
+ st.session_state.user_labels = []
 
 
 
 
 
 
 
137
 
138
+ def get_stable_color(label):
139
+ hash_object = hashlib.sha1(label.encode('utf-8'))
140
+ hex_dig = hash_object.hexdigest()
141
+ return '#' + hex_dig[:6]
142
 
143
+ # --- Main App with Tabs ---
144
+ tab1, tab2 = st.tabs(["HR.ai (Named Entity Recognition)", "InfoFinder (Question-Answering)"])
145
+
146
+ with tab1:
147
+ st.markdown("### HR.ai: Named Entity Recognition")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ # Load model for this tab
150
+ model_hr = load_gliner_model("HR_AI")
 
 
 
 
 
 
 
 
151
 
152
+ text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_hr')
 
 
 
 
 
 
 
 
153
 
154
+ def clear_text_hr():
155
+ st.session_state['my_text_area_hr'] = ""
 
 
156
 
157
+ st.button("Clear text", on_click=clear_text_hr, key="clear_hr")
158
 
159
+ if st.button("Extract HR Entities"):
160
+ start_time = time.time()
161
+ if not text.strip():
162
+ st.warning("Please enter some text to extract entities.")
 
163
  else:
164
+ with st.spinner("Extracting entities...", show_time=True):
165
+ entities = model_hr.predict_entities(text, labels)
166
+ df = pd.DataFrame(entities)
167
+
168
+ if not df.empty:
169
+ df['category'] = df['label'].map(reverse_category_mapping)
170
+ if comet_initialized:
171
+ experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
172
+ experiment.log_parameter("input_text", text)
173
+ experiment.log_table("predicted_entities", df)
174
+
175
+ st.subheader("Grouped Entities by Category", divider="green")
176
+ category_names = sorted(list(category_mapping.keys()))
177
+ category_tabs_hr = st.tabs(category_names)
178
+ for i, category_name in enumerate(category_names):
179
+ with category_tabs_hr[i]:
180
+ df_category_filtered = df[df['category'] == category_name]
181
+ if not df_category_filtered.empty:
182
+ st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
183
+ else:
184
+ st.info(f"No entities found for the '{category_name}' category.")
185
+
186
+ with st.expander("See Glossary of tags"):
187
+ st.write('''
188
+ - **text**: ['entity extracted from your text data']
189
+ - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
190
+ - **label**: ['label (tag) assigned to a given extracted entity']
191
+ - **category**: ['the high-level category for the label']
192
+ - **start**: ['index of the start of the corresponding entity']
193
+ - **end**: ['index of the end of the corresponding entity']
194
+ ''')
195
+ st.divider()
196
+
197
+ st.subheader("Tree map", divider="green")
198
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
199
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
200
+ st.plotly_chart(fig_treemap)
201
+
202
+ col1, col2 = st.columns(2)
203
+ with col1:
204
+ st.subheader("Pie chart", divider="green")
205
+ grouped_counts = df['category'].value_counts().reset_index()
206
+ grouped_counts.columns = ['category', 'count']
207
+ fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
208
+ fig_pie.update_traces(textposition='inside', textinfo='percent+label')
209
+ fig_pie.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
210
+ st.plotly_chart(fig_pie)
211
+
212
+ with col2:
213
+ st.subheader("Bar chart", divider="green")
214
+ fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
215
+ fig_bar.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
216
+ st.plotly_chart(fig_bar)
217
+
218
+ st.subheader("Most Frequent Entities", divider="green")
219
+ word_counts = df['text'].value_counts().reset_index()
220
+ word_counts.columns = ['Entity', 'Count']
221
+ repeating_entities = word_counts[word_counts['Count'] > 1]
222
+ if not repeating_entities.empty:
223
+ st.dataframe(repeating_entities, use_container_width=True)
224
+ fig_repeating_bar = px.bar(repeating_entities, x='Entity', y='Count', color='Entity')
225
+ fig_repeating_bar.update_layout(xaxis={'categoryorder': 'total descending'}, paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
226
+ st.plotly_chart(fig_repeating_bar)
227
+ else:
228
+ st.warning("No entities were found that occur more than once.")
229
+
230
+ st.divider()
231
+
232
+ dfa = pd.DataFrame(data={'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'], 'Description': ['entity extracted from your text data', 'label (tag) assigned to a given extracted entity', 'accuracy score; how accurately a tag has been assigned to a given entity', 'index of the start of the corresponding entity', 'index of the end of the corresponding entity', 'the broader category the entity belongs to']})
233
+ buf = io.BytesIO()
234
+ with zipfile.ZipFile(buf, "w") as myzip:
235
+ myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
236
+ myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
237
+
238
+ st.download_button(
239
+ label="Download results and glossary (zip)",
240
+ data=buf.getvalue(),
241
+ file_name="nlpblogs_hr_results.zip",
242
+ mime="application/zip",
243
+ )
244
+
245
+ if comet_initialized:
246
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
247
+ experiment.end()
248
+ else:
249
+ st.warning("No entities were found in the provided text.")
250
+
251
+ end_time = time.time()
252
+ elapsed_time = end_time - start_time
253
+ st.text("")
254
+ st.text("")
255
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
256
+
257
+ with tab2:
258
+ st.markdown("### InfoFinder: Question-Answering")
259
+
260
+ # Load model for this tab
261
+ model_qa = load_gliner_model("InfoFinder")
262
+
263
+ user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_infofinder')
264
+
265
+ def clear_text_qa():
266
+ st.session_state['my_text_area_infofinder'] = ""
267
+
268
+ st.button("Clear text", on_click=clear_text_qa, key="clear_qa")
269
+
270
+ st.subheader("Question-Answering", divider="violet")
271
+ question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
272
+
273
+ if st.button("Add Question"):
274
+ if question_input:
275
+ if question_input not in st.session_state.user_labels:
276
+ st.session_state.user_labels.append(question_input)
277
+ st.success(f"Added question: {question_input}")
278
+ else:
279
+ st.warning("This question has already been added.")
280
+ else:
281
+ st.warning("Please enter a question.")
282
+
283
+ st.markdown("---")
284
+ st.subheader("Record of Questions", divider="violet")
285
+
286
+ if st.session_state.user_labels:
287
+ for i, label in enumerate(st.session_state.user_labels):
288
+ col_list, col_delete = st.columns([0.9, 0.1])
289
+ with col_list:
290
+ st.write(f"- {label}", key=f"label_{i}")
291
+ with col_delete:
292
+ if st.button("Delete", key=f"delete_{i}"):
293
+ st.session_state.user_labels.pop(i)
294
+ st.rerun()
295
  else:
296
+ st.info("No questions defined yet. Use the input above to add one.")
 
 
 
 
 
 
 
297
 
298
+ st.divider()
299
+
300
+ if st.button("Extract Answers"):
301
+ if not user_text.strip():
302
+ st.warning("Please enter some text to analyze.")
303
+ elif not st.session_state.user_labels:
304
+ st.warning("Please define at least one question.")
305
+ else:
306
+ if comet_initialized:
307
+ experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
308
+ experiment.log_parameter("input_text_length", len(user_text))
309
+ experiment.log_parameter("defined_labels", st.session_state.user_labels)
310
+
311
+ start_time = time.time()
312
+ with st.spinner("Analyzing text...", show_time=True):
313
+ try:
314
+ entities = model_qa.predict_entities(user_text, st.session_state.user_labels)
315
+ end_time = time.time()
316
+ elapsed_time = end_time - start_time
317
+ st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
318
+
319
+ if entities:
320
+ df1 = pd.DataFrame(entities)
321
+ df2 = df1[['label', 'text', 'score']]
322
+ df = df2.rename(columns={'label': 'question', 'text': 'answer'})
323
+
324
+ st.subheader("Extracted Answers", divider="violet")
325
+ st.dataframe(df, use_container_width=True)
326
+
327
+ st.subheader("Tree map", divider="violet")
328
+ all_labels = df['question'].unique()
329
+ label_color_map = {label: get_stable_color(label) for label in all_labels}
330
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
331
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
332
+ st.plotly_chart(fig_treemap)
333
+
334
+ csv_data = df.to_csv(index=False).encode('utf-8')
335
  st.download_button(
336
  label="Download CSV",
337
  data=csv_data,
338
+ file_name="nlpblogs_infofinder_results.csv",
339
  mime="text/csv",
340
  )
341
+
342
+ if comet_initialized:
343
+ experiment.log_metric("processing_time_seconds", elapsed_time)
344
+ experiment.log_table("predicted_entities", df)
345
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
346
+ experiment.end()
347
+ else:
348
+ st.info("No answers were found in the text with the defined questions.")
349
+ if comet_initialized:
350
+ experiment.end()
351
+ except Exception as e:
352
+ st.error(f"An error occurred during processing: {e}")
353
+ st.write(f"Error details: {e}")
354
  if comet_initialized:
355
+ experiment.log_text(f"Error: {e}")
 
 
 
 
 
356
  experiment.end()
357
+
358
+
359
+
360
+