pokameswaran commited on
Commit
5efc535
·
1 Parent(s): 98d6da2

Added files related to the app

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/chromadb/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
chatbot.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dependencies
2
+ import logging
3
+ import streamlit as st
4
+ from streamlit_mic_recorder import speech_to_text
5
+ from pathlib import Path
6
+ from chatbot_functionalities.generate_questions import generate_questions
7
+ from chatbot_functionalities.vectordb_operations import get_collection_from_vector_db
8
+ from chatbot_functionalities.evaluate_answers import evaluate_all_answers, get_overall_feedback
9
+
10
+ # enable logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger("simple-chatbot")
13
+
14
+
15
+ # function to initialize web app for the first time
16
+ def initialize_app():
17
+ """Performs processing that should happen upon loading of the web app and
18
+ sets all session state variables to their desired initial state.
19
+ """
20
+ # set status flags to their desired initial state
21
+ st.session_state.p01_show_mock_interview = False
22
+ st.session_state.p01_profile_details_taken = False
23
+ st.session_state.p01_questions_generated = False
24
+ st.session_state.p01_record_answer_disabled = False
25
+ st.session_state.p01_start_mock_interview_disabled = False
26
+
27
+ # initialize variables related to question and interview history
28
+ st.session_state.p01_current_question = None
29
+ st.session_state.p01_current_question_index = -1
30
+ st.session_state.p01_questions_count = 0
31
+ st.session_state.p01_interview_history = []
32
+
33
+ # first question that will be asked to every candidate
34
+ # this can be replaced with CV summarization component
35
+ st.session_state.p01_candidate_profile_question = (
36
+ "Please provide a brief summary about your education background and prior work experience "
37
+ "that may be relevant to the chosen job position."
38
+ )
39
+
40
+ # instruction that will be printed before the microphone button
41
+ st.session_state.p01_recording_instructions = (
42
+ "All responses will be captured through the microphone available on your device. "
43
+ "Ensure that the microphone is working and configured correctly."
44
+ "Press the 'Record Answer' button and start speaking on the microphone after 1 second."
45
+ )
46
+
47
+ # fetch the necessary collections from vector db
48
+ st.session_state.p01_questions_collection = get_collection_from_vector_db(
49
+ vdb_path=(Path.cwd() / "data" / "chromadb").__str__(),
50
+ collection_name="question_collection",
51
+ )
52
+
53
+ # set the flag that indiciates initialization is done
54
+ # this flag is crucial and should be done as the very last step in this function as
55
+ # the web app invokes this function only when this variable is not set
56
+ st.session_state.p01_init_complete = True
57
+
58
+
59
+ def load_interview_questions():
60
+ """Helper function to call question generation module"""
61
+ if not st.session_state.p01_questions_generated:
62
+ # use candidate provided profile summary and generate subsequent questions to be asked
63
+ st.session_state.p01_questions_df = generate_questions(
64
+ position=st.session_state.p01_job_position,
65
+ candidate_profile=st.session_state.p01_interview_history[1]["content"],
66
+ question_collection=st.session_state.p01_questions_collection,
67
+ )
68
+
69
+ # set questions count
70
+ st.session_state.p01_questions_count = st.session_state.p01_questions_df.shape[
71
+ 0
72
+ ]
73
+
74
+ # set flag to indicate that questions have been generated
75
+ st.session_state.p01_questions_generated = True
76
+ st.session_state.p01_mock_interview_concluded = False
77
+
78
+
79
+ # function(s) to process user interactions
80
+ def start_mock_interview():
81
+ """Resets mock interview section of the app and adds the question to
82
+ collect candidate profile details.
83
+ """
84
+ st.session_state.p01_show_mock_interview = True
85
+ # st.session_state.p01_profile_details_taken = False
86
+ st.session_state.p01_questions_generated = False
87
+ st.session_state.p01_interview_history = []
88
+ st.session_state.p01_record_answer_disabled = False
89
+ st.session_state.p01_start_mock_interview_disabled = True
90
+ st.session_state.overall_feedback = None
91
+
92
+ # set current question to candidate profile request question
93
+ st.session_state.p01_current_question = (
94
+ st.session_state.p01_candidate_profile_question[:]
95
+ )
96
+
97
+ def speech_recognition_callback():
98
+ if st.session_state.my_stt_output is None:
99
+ st.session_state.p01_error_message = "Please record your reponse again."
100
+ return
101
+
102
+ st.session_state.p01_error_message = None
103
+
104
+ st.session_state.p01_last_candidate_response = st.session_state.my_stt_output
105
+
106
+ # if code reaches this point, then a response was successfully captured and transcribed
107
+ # append current question and the utterance from the candidate to interview history
108
+ st.session_state.p01_interview_history.append(
109
+ dict(role="assistant", content=st.session_state.p01_current_question)
110
+ )
111
+ st.session_state.p01_interview_history.append(
112
+ dict(role="user", content=st.session_state.my_stt_output)
113
+ )
114
+
115
+ # generate questions if not already done
116
+ # this is done here instead of 'Start Mock Interview' button because we
117
+ # CV summarization component is not ready and we need to ask the candidate
118
+ # to give a profile summary as part of first question
119
+ if not st.session_state.p01_questions_generated:
120
+ with st.spinner("Preparing questions for your mock interview"):
121
+ load_interview_questions()
122
+
123
+ # Add answer to question's dataframe
124
+ if st.session_state.p01_current_question_index > -1:
125
+ # ignoring the summary input
126
+ st.session_state.p01_questions_df.loc[st.session_state.p01_current_question_index, 'answer'] = st.session_state.my_stt_output
127
+
128
+ # change current question to the next available question
129
+ # check if there are any more question(s) to be asked
130
+ if (
131
+ st.session_state.p01_current_question_index
132
+ < st.session_state.p01_questions_count - 1
133
+ ):
134
+ st.session_state.p01_current_question_index += 1
135
+ st.session_state.p01_current_question = (
136
+ st.session_state.p01_questions_df.iloc[
137
+ st.session_state.p01_current_question_index
138
+ ].question
139
+ )
140
+ # no more questions to be asked
141
+ else:
142
+ st.session_state.p01_current_question = "Your mock interview is over"
143
+ st.session_state.p01_record_answer_disabled = True
144
+ st.session_state.p01_start_mock_interview_disabled = False
145
+ st.session_state.p01_mock_interview_concluded = True
146
+
147
+ # Since the update is async, the question will not update.
148
+ # hence forced rerun required.
149
+ st.experimental_rerun()
150
+
151
+ def get_feedback():
152
+ evaluate_all_answers(
153
+ interview_history=st.session_state.p01_questions_df,
154
+ questions_collection=st.session_state.p01_questions_collection,
155
+ )
156
+ # get_ratings_for_answers(st.session_state.p01_questions_df)
157
+ # get_feedback_for_answers(st.session_state.p01_questions_df)
158
+ st.session_state.overall_feedback = get_overall_feedback()
159
+
160
+ # function for rendering the main web application
161
+ def run_web_app():
162
+ """Renders the web application, captures user actions and
163
+ invokes appropriate event specific callbacks.
164
+ """
165
+
166
+ # page or window title - this shows up as browser window title
167
+ st.set_page_config(page_title="Interview Preparation Assistant")
168
+
169
+ # call initialization function (only for the first time)
170
+ if "p01_init_complete" not in st.session_state:
171
+ initialize_app()
172
+
173
+ # setup sidebar
174
+ # siderbar title
175
+ st.sidebar.markdown(
176
+ "<h4 style='color: orange;'>Candidate Profile</h4>",
177
+ unsafe_allow_html=True,
178
+ )
179
+
180
+ # user input field to capture name of the candidate
181
+ candidate_name = st.sidebar.text_input(
182
+ label="Candidate Name",
183
+ placeholder="Enter your name",
184
+ key="p01_candidate_name",
185
+ )
186
+
187
+ # list of allowed values for job position
188
+ job_position_options = [
189
+ "Customer Service Representative",
190
+ "Sales Manager",
191
+ "Marketing Manager ",
192
+ "Nurse",
193
+ "Medical Assistance",
194
+ ]
195
+ # user input field to capture job position for which candidate wants to prepare
196
+ job_position = st.sidebar.selectbox(
197
+ label="Job Position",
198
+ placeholder="Select a job position",
199
+ options=job_position_options,
200
+ key="p01_job_position",
201
+ )
202
+
203
+ # button to start mock interview
204
+ st.sidebar.button(
205
+ label="Start Mock Interview",
206
+ on_click=start_mock_interview,
207
+ disabled=st.session_state.p01_start_mock_interview_disabled,
208
+ key="p01_start_mock_interview",
209
+ )
210
+
211
+ # setup tabs
212
+ combined_tabs = st.tabs(["Q&A", "History", "Results"])
213
+ tab1, tab2, tab3 = combined_tabs
214
+
215
+ # render mock interview section in tab 1
216
+ if st.session_state.p01_show_mock_interview:
217
+ with tab1:
218
+ # set page heading (this is a title for the main section of the app)
219
+ p01_interview_section_title = (
220
+ f"Mock Interview for {st.session_state.p01_job_position}"
221
+ )
222
+ with st.container():
223
+ st.markdown(
224
+ f"<h4 style='color: orange;'>{p01_interview_section_title}</h4>",
225
+ unsafe_allow_html=True,
226
+ )
227
+
228
+ # current question section
229
+ with st.container():
230
+ p01_current_question_title = "Current Question"
231
+ with st.container():
232
+ st.markdown(
233
+ f"<h6 style='color: orange;'>{p01_current_question_title}</h6>",
234
+ unsafe_allow_html=True,
235
+ )
236
+ with st.chat_message("assistant"):
237
+ st.markdown(st.session_state.p01_current_question)
238
+
239
+ # button to start recording
240
+ if 'p01_start_mock_interview_disabled' in st.session_state and st.session_state.p01_start_mock_interview_disabled is True:
241
+ with st.spinner():
242
+ speech_to_text(
243
+ key='my_stt',
244
+ callback=speech_recognition_callback
245
+ )
246
+
247
+ # error message section
248
+ if "p01_error_message" in st.session_state:
249
+ if st.session_state.p01_error_message is not None:
250
+ with st.container():
251
+ st.error(st.session_state.p01_error_message)
252
+
253
+ # render interview history in tab 2
254
+ with tab2:
255
+ # loop through interview history and show the messages if they exist
256
+ p01_interview_history_title = "Interview History"
257
+ with st.container():
258
+ st.markdown(
259
+ f"<h4 style='color: orange;'>{p01_interview_history_title}</h4>",
260
+ unsafe_allow_html=True,
261
+ )
262
+ for message in st.session_state.p01_interview_history[::-1]:
263
+ with st.chat_message(message["role"]):
264
+ st.markdown(message["content"])
265
+
266
+ # render evaluation results and feedback in tab 3
267
+ # Add interview over flag here
268
+ with tab3:
269
+ # loop through evaluation results and show the results if they exist
270
+ p01_interview_evaluation_title = "Evaluation Results & Feedback"
271
+ with st.container():
272
+ st.markdown(
273
+ f"<h4 style='color: orange;'>{p01_interview_evaluation_title}</h4>",
274
+ unsafe_allow_html=True,
275
+ )
276
+
277
+ if 'p01_mock_interview_concluded' in st.session_state and st.session_state.p01_mock_interview_concluded is True:
278
+ st.button(
279
+ label="Get Feedback",
280
+ type="primary",
281
+ on_click=get_feedback,
282
+ key="p01_get_feedback"
283
+ )
284
+
285
+ if 'overall_feedback' in st.session_state and st.session_state.overall_feedback is not None:
286
+ if 'p01_questions_df' in st.session_state:
287
+ st.markdown(
288
+ f"<h6 style='color: orange;'>Question Level Feedback</h6>",
289
+ unsafe_allow_html=True,
290
+ )
291
+ with st.container():
292
+ col1, col2, col3 = st.columns(3)
293
+ with col1:
294
+ st.markdown(
295
+ f"<h6 style='color: red;'>Question</h6>",
296
+ unsafe_allow_html=True,
297
+ )
298
+ with col2:
299
+ st.markdown(
300
+ f"<h6 style='color: red;'>Answer</h6>",
301
+ unsafe_allow_html=True,
302
+ )
303
+ with col3:
304
+ st.markdown(
305
+ f"<h6 style='color: red;'>Rating & Feedback</h6>",
306
+ unsafe_allow_html=True,
307
+ )
308
+
309
+ for row in st.session_state.p01_questions_df.itertuples():
310
+ with st.container():
311
+ col1, col2, col3 = st.columns(3)
312
+ with col1:
313
+ st.markdown(row.question)
314
+ with col2:
315
+ st.markdown(row.answer)
316
+ with col3:
317
+ st.markdown(row.feedback)
318
+
319
+ with st.container():
320
+ st.markdown(
321
+ f"<h6 style='color: orange;'>Overall Feedback</h6>",
322
+ unsafe_allow_html=True,
323
+ )
324
+ with st.chat_message("assistant"):
325
+ st.markdown("This functionality will be available in next release.")
326
+
327
+
328
+ # call the function to render the main web application
329
+ if __name__ == "__main__":
330
+ run_web_app()
chatbot_functionalities/__init__.py ADDED
File without changes
chatbot_functionalities/answer_evaluation.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import FewShotPromptTemplate
2
+ from chatbot_functionalities.llms import llm_inference
3
+
4
+
5
+ def evaluate_answer(
6
+ question: str,
7
+ answer: str,
8
+ position: str,
9
+ ) -> str:
10
+ """Call HuggingFace/OpenAI model for inference
11
+
12
+ Given a question,answer, and position , this function calls the relevant
13
+ API to fetch LLM inference results.
14
+
15
+ Args:
16
+ question: The generated question from our database
17
+ answer: answer given by the candidate
18
+ position: job position that the candidate applying for
19
+
20
+
21
+ Returns:
22
+ Rating: rating for candidate's answer .
23
+ qualitative_feedback : based on the candidate's answer and the given rating.
24
+
25
+ HuggingFace repo_id example:
26
+ - mistralai/Mistral-7B-Instruct-v0.1
27
+
28
+ """
29
+ if position == "Customer Service Representative":
30
+ #set up examples
31
+ examples = [
32
+ {
33
+ "position": f"""{position}""",
34
+ "question": """How can you improve a dissatisfied customer's experience?""",
35
+ "answer": """I've found the most successful strategy for turning an unhappy customer into a happy customer is by actively listening to what they're saying. Sometimes, customers just want you to listen to them, and they want to feel like the company cares about them and their opinions. \
36
+ For example, I once had a customer who got home to find there was only one shoe in their shoebox. They were quite upset, so I let them explain the issue and then I validated their feelings and provided them with a discount on the purchase along with the missing shoe. They left in a much better mood and became a loyal customer.""",
37
+ "Rating" : "Good",
38
+ #"qualitative_feedback": """The candidate's response is rated as 'Good.' The answer not only emphasizes the importance of active listening but also provides a specific and illustrative example to support the strategy. The candidate goes beyond general advice by recounting a real scenario where a customer faced an issue, demonstrating a practical application of the suggested approach. The mention of validating the customer's feelings and offering a discount, along with the missing shoe, shows a proactive and customer-focused problem-solving approach. This response indicates a strong understanding of customer service principles and an ability to apply them effectively in challenging situations, resulting in customer satisfaction and loyalty."""
39
+ },{
40
+ "position": f"""{position}""",
41
+ "question": """How can you improve a dissatisfied customer's experience?""",
42
+ "answer": """I've found the most successful strategy for turning an unhappy customer into a happy customer is by actively listening to what they're saying. Sometimes, customers just want you to listen to them, and they want to feel like the company cares about them and their opinions. """,
43
+ "Rating": "Average",
44
+ #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While the answer acknowledges the importance of active listening, it lacks depth in providing a comprehensive strategy for improving a dissatisfied customer's experience. The candidate briefly mentions the significance of making the customer feel cared for, but there is a lack of specific actions or steps to address and resolve the customer's concerns. A stronger response could have included additional elements such as empathetic communication, prompt issue resolution, and, if applicable, offering appropriate compensation or solutions. The answer, though acknowledging a key aspect, falls short of providing a well-rounded and detailed approach to handling dissatisfied customers."""
45
+ },{
46
+ "position": f"""{position}""",
47
+ "question": """How can you improve a dissatisfied customer's experience?""",
48
+ "answer": " I was playing a game.",
49
+ "Rating" : "Poor",
50
+ #"qualitative_feedback": """The candidate's response is rated as 'Poor.' The answer provided does not address the question and appears to be irrelevant to the context of improving a dissatisfied customer's experience. It lacks any relevant information or insight into customer service strategies. A strong response should have focused on practical approaches, communication skills, and problem-solving methods to enhance the customer experience. The candidate's answer demonstrates a misunderstanding of the question and an inability to provide a relevant and thoughtful response."""
51
+ }
52
+ ]
53
+ elif position == "Nurse":
54
+ #set up examples
55
+ examples = [
56
+ {
57
+ "position": f"""{position}""",
58
+ "question": """ how do you handle the stress of the job ?""",
59
+ "answer": """I find the best way to handle the stress of the job is through meticulous organization and attention to detail. By making lists and prioritizing what needs to get done throughout my day I find that tasks which might seem overwhelming all at once are much more manageable. This also makes it possible for me to stay calm and remain focused on what needs to get done when unexpected situations arise.""",
60
+ "Rating" : "Good",
61
+ #"qualitative_feedback": """The candidate's response is rated as 'Good.' They provide a well-thought-out and practical approach to handling the stress of the nursing job. The emphasis on meticulous organization, attention to detail, and prioritization through making lists is a strong strategy for managing workload and preventing tasks from becoming overwhelming. The candidate's acknowledgment of the inevitability of unexpected situations and the ability to remain calm and focused in such scenarios demonstrates adaptability and resilience. Overall, the response showcases effective coping mechanisms that align with the demands of a nursing role, indicating a proactive and organized approach to stress management."""
62
+ },{
63
+ "position": f"""{position}""",
64
+ "question": """how do you handle the stress of the job ?""",
65
+ "answer": """I handle stress by focusing on the most important thing the care of the patient. I feel I owe it to my patients to stay calm and focused on them. """,
66
+ "Rating": "Average",
67
+ #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While the answer acknowledges a strategy for handling stress by focusing on patient care, it lacks depth in providing additional coping mechanisms or self-care strategies. A more robust response could have included personal methods for maintaining work-life balance, seeking support from colleagues, or engaging in stress-relief activities outside of work. Additionally, the candidate could have elaborated on how maintaining focus on patient care contributes to their overall stress management. While the answer is acceptable, it falls slightly short of providing a more comprehensive understanding of the candidate's approach to handling stress in the nursing role."""
68
+ },{
69
+ "position": f"""{position}""",
70
+ "question": """ how do you handle the stress of the job ?""",
71
+ "answer": "I like a fast-paced pressure-filled environment that makes my job invigorating.",
72
+ "Rating" : "Poor",
73
+ #"qualitative_feedback": """The candidate's response is rated as 'Poor.' While expressing a preference for a fast-paced and pressure-filled environment can indicate adaptability, the answer lacks depth in addressing how the candidate actively manages and handles stress in the nursing job. A strong response would have included specific strategies or coping mechanisms, such as organization, prioritization, or self-care practices, to demonstrate a proactive approach to stress management. The current answer is vague and does not provide insight into the candidate's ability to handle the inherent stress of the nursing role, which is crucial for the position. A more detailed and focused response would have been more appropriate."""
74
+ }
75
+ ]
76
+ elif position == "Marketing Manager":
77
+ #set up examples
78
+ examples = [
79
+ {
80
+ "position": f"""{position}""",
81
+ "question": """Are you a team player? """,
82
+ "answer": """I am absolutely a team player. My perspective has always been that if my team succeeds, I succeed, and if I succeed, my team succeeds. I think work is a lot more fun when you're sharing your time and energy with people who want to raise each other up.""",
83
+ "Rating" : "Good",
84
+ #"qualitative_feedback": """The candidate's response is rated as 'Good.' They express a positive and collaborative attitude towards teamwork. The candidate emphasizes the mutual success of both individual and team, demonstrating an understanding of the interconnectedness of personal and team achievements. The mention of finding work more enjoyable when sharing time and energy with supportive team members adds a personal touch to the answer. Overall, the response conveys a strong commitment to teamwork and suggests that the candidate values a collaborative work environment, which is a positive trait for a Marketing Manager role."""
85
+ },{
86
+ "position": f"""{position}""",
87
+ "question": """Are rich snippets important for SEO ?""",
88
+ "answer": """"Having rich snippets can help search results stand out and increase the click-through rate. In the long run, it can positively affect page ranking, too.""",
89
+ "Rating": "Average",
90
+ #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While the answer acknowledges the importance of rich snippets for SEO by mentioning that they can help search results stand out and increase click-through rates, it lacks depth in providing a more comprehensive explanation. A stronger response could have delved into the specific types of information that can be included in rich snippets, their impact on user engagement, and how they contribute to a better user experience. Additionally, the candidate could have elaborated on how search engines use rich snippets to understand the content better. The answer, though correct in recognizing the value of rich snippets, falls short of providing a more detailed and insightful response."""
91
+ },{
92
+ "position": f"""{position}""",
93
+ "question": """Can you discuss a time when a marketing campaign didn't perform as expected? How did you handle it, and what did you learn from the experience?""",
94
+ "answer": " I never had a campaign fail on me. All my campaigns were successful.",
95
+ "Rating" : "Poor",
96
+ #"qualitative_feedback": """The candidate's response is rated as 'Poor.' The answer lacks credibility and does not align with the reality of marketing, where not all campaigns are guaranteed to be successful. A more realistic and honest approach would have been to acknowledge that marketing campaigns can face challenges and share a specific instance where a campaign did not perform as expected. This would have provided an opportunity for the candidate to demonstrate problem-solving skills, adaptability, and the ability to learn from setbacks. The lack of humility and the claim that all campaigns were successful suggests a lack of transparency and self-awareness, which are crucial qualities for a Marketing Manager."""
97
+ }
98
+ ]
99
+ elif position == "Sales Manager":
100
+ #set up examples
101
+ examples = [
102
+ {
103
+ "position": f"""{position}""",
104
+ "question": """Why do you want the sales manager position?""",
105
+ "answer": """ I enjoyed what I read about this company and your products. I am ecstatic at the possibility of working for you. I love working with teams and helping to guide them to give it their all every day because that’s what I will do as the sales manager. I appreciate all the rave reviews about your products and want to help get your sales to the next level.\
106
+ In my previous job, I was promoted to start a new sales team and got to choose team members. I looked at everyone’s personalities, experiences, strengths and weaknesses to create a team that would balance each other. I know I can succeed as the sales manager for this company and want the opportunity to show you how I can help this company reach new heights.""",
107
+ "Rating" : "Good",
108
+ #"qualitative_feedback": """The candidate's response is rated as 'Good.' They provide a well-rounded answer that demonstrates genuine enthusiasm for the company and the sales manager position. The mention of enjoying what they read about the company and its products, along with expressing excitement at the possibility of working there, conveys a positive attitude. The candidate articulates a passion for working with teams and guiding them to excel, aligning with the responsibilities of a sales manager. Additionally, the mention of past success in starting a new sales team and strategically selecting team members showcases relevant experience and leadership skills. The candidate's commitment to contributing to the company's growth and taking it to the next level adds value to their response. Overall, the answer effectively communicates a strong interest in the position and the ability to make meaningful contributions to the sales team."""
109
+ },{
110
+ "position": f"""{position}""",
111
+ "question": """Why do you want the sales manager position?""",
112
+ "answer": """"I enjoyed what I read about this company and your products. I am ecstatic at the possibility of working for you. I love working with teams and helping to guide them to give it their all every day because that’s what I will do as the sales manager. I appreciate all the rave reviews about your products and want to help get your sales to the next level.\
113
+ """,
114
+ "Rating": "Average",
115
+ #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While expressing excitement about the company and the products, the answer lacks specific details about the candidate's qualifications or experiences that make them suitable for the sales manager position. The mention of loving to work with teams and guide them is positive, but it could be enhanced by providing examples of past successes or leadership experiences in managing sales teams. Additionally, the candidate expresses a desire to help elevate sales but does not offer a clear strategy or insights into how they plan to achieve this goal. A stronger response would include more concrete details about the candidate's skills, experiences, and how they intend to contribute to the company's sales growth."""
116
+ },{
117
+ "position": f"""{position}""",
118
+ "question": """Why do you want the sales manager position?""",
119
+ "answer": " I enjoyed what I read about this company and your products.",
120
+ "Rating" : "Poor",
121
+ #"qualitative_feedback": """The candidate's response is rated as 'Poor.' The answer is overly brief and lacks substance. While expressing enjoyment about the company and its products is positive, it does not provide any meaningful insights into the candidate's qualifications, motivations, or specific reasons for wanting the sales manager position. A strong response would include details about the candidate's relevant skills, experiences, and how they plan to contribute to the success of the sales team. The current answer falls short of demonstrating a genuine interest in the role and does not convey a strong commitment to the position."""
122
+ }
123
+ ]
124
+ #position == "Medical Assistance"
125
+ else :
126
+ #set up examples
127
+ examples = [
128
+ {
129
+ "position": f"""{position}""",
130
+ "question": """Can you tell me about a time you overcame a difficult situation?""",
131
+ "answer": """ When I was working at the hospital, I communicated with an upset mother who insisted on being in the operating room with her son during his surgery. As this violated hospital rules, I knew I couldn't allow her in the room. Instead of becoming impatient with her, I tried to be empathetic about her situation. I understood she felt scared and didn't know about our safety procedures.\
132
+ I told her I understood her situation and knew she just wanted the best for her son. Next, I informed her politely of the hospital's policies and why they were in place, emphasizing that following them would help keep her son safe. I even promised to give her hourly updates, which comforted her and increased her trust in the medical team. She thanked me for speaking with her and providing great care for her son.""",
133
+ "Rating" : "Good",
134
+ #"qualitative_feedback": """The candidate's response is rated as 'Good.' They provide a detailed and well-structured example of overcoming a difficult situation in a medical setting. The candidate effectively demonstrates strong communication and empathy skills in dealing with an upset mother. They not only recognized and validated the mother's emotions but also explained the hospital's policies with empathy and understanding. The offer of hourly updates to comfort the mother and build trust in the medical team shows a proactive and patient-focused approach. Overall, the response showcases the candidate's ability to handle challenging situations with empathy, effective communication, and a commitment to patient care."""
135
+ },{
136
+ "position": f"""{position}""",
137
+ "question": """Can you tell me about a time you overcame a difficult situation?""",
138
+ "answer": """"When I was working at the hospital, I communicated with an upset mother who insisted on being in the operating room with her son during his surgery. As this violated hospital rules, I knew I couldn't allow her in the room. Instead of becoming impatient with her, I tried to be empathetic about her situation. I understood she felt scared and didn't know about our safety procedures.
139
+ """,
140
+ "Rating": "Average",
141
+ #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While they provide a specific example of overcoming a difficult situation in a medical setting, the response lacks some depth. The candidate effectively communicates empathy and understanding towards the upset mother's situation, which is positive. However, the answer could be improved by providing more details about the resolution or outcome of the situation. Offering insights into how the candidate successfully navigated the violation of hospital rules, the mother's reaction to the explanation, or any additional steps taken would have added more substance to the response. Overall, while the answer is acceptable, there is room for enhancement in providing a more comprehensive account of the situation."""
142
+ },{
143
+ "position": f"""{position}""",
144
+ "question": """Can you tell me about a time you overcame a difficult situation?""",
145
+ "answer": " When I was working at the hospital, I communicated with an upset mother who insisted on being in the operating room with her son during his surgery.",
146
+ "Rating" : "Poor",
147
+ #"qualitative_feedback": """The candidate's response is rated as 'Poor.' While the candidate starts to describe a challenging situation involving an upset mother, the answer is incomplete and lacks necessary details. The response does not provide information on how the candidate handled the situation, what actions were taken, or the resolution of the problem. A strong answer to this question should include specific actions taken, the candidate's thought process, and the positive outcome or lessons learned from overcoming the difficult situation. In its current form, the response lacks the depth and completeness needed to showcase the candidate's problem-solving and interpersonal skills effectively."""
148
+ }
149
+ ]
150
+ #set up example_template
151
+ example_template = """
152
+ position: {position} .\
153
+ question: {question} \
154
+ answer: {answer}.\
155
+ Rating:{Rating}.\
156
+ """
157
+ #qualitative_feedback:{qualitative_feedback}.\
158
+
159
+ #set up example_prompt
160
+ example_prompt = PromptTemplate(
161
+ input_variables=["position", "question", "answer","Rating"],
162
+ template=example_template
163
+ )
164
+
165
+
166
+
167
+
168
+ # Set up prefix prompt
169
+ prefix = """
170
+ ### instruction: you are an experienced interviewer.\
171
+ You are interviewing a candidate for the position of {position} .\
172
+ You are tasked to rate an answer provided by the candidate. You should provide a categorical Rating and qualitative feedback.\
173
+ The categorical rating should be one of the following values: Good, average, or Poor.\
174
+ the qualitative feedback should provide sufficient details to justify the categorical rating.\
175
+ The position and the question asked to the candidate and the answer given by the candidate are given below.\
176
+ also some examples are given below.\
177
+ """
178
+ suffix = """
179
+ position : {position} .\
180
+ question : {question} \
181
+ answer : {answer}.\
182
+ qualitative_feedback:
183
+
184
+ """
185
+
186
+ few_shot_prompt_template = FewShotPromptTemplate(
187
+ examples=examples,
188
+ example_prompt=example_prompt,
189
+ prefix=prefix,
190
+ suffix=suffix,
191
+ input_variables=["position", "question", "answer"],
192
+ example_separator="\\\n\\\n" )
193
+
194
+
195
+ # send prompt to LLM using the common function
196
+ response = llm_inference(
197
+ model_type="huggingface",
198
+ input_variables_list=[ position, question, answer],
199
+ prompt_template=few_shot_prompt_template,
200
+ hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1",
201
+ inference_type = "evaluation",
202
+ temperature=0.1,
203
+ max_length=32000,
204
+ )
205
+
206
+
207
+
208
+ return response
chatbot_functionalities/evaluate_answers.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import chromadb
4
+ from chatbot_functionalities.llms import llm_inference
5
+ from langchain.output_parsers import ResponseSchema
6
+ from langchain.output_parsers import StructuredOutputParser
7
+ from typing import List
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain import FewShotPromptTemplate
10
+ from pathlib import Path
11
+
12
+ def evaluate_answer(
13
+ question: str,
14
+ answer: str,
15
+ position: str,
16
+ questions_collection: chromadb.Collection,
17
+ ):
18
+ """Call HuggingFace/OpenAI model for inference
19
+
20
+ Given a question,answer, and position , this function calls the relevant
21
+ API to fetch LLM inference results.
22
+
23
+ Args:
24
+ question: The generated question from our database
25
+ answer: answer given by the candidate
26
+ position: job position that the candidate applying for
27
+
28
+
29
+ Returns:
30
+ Rating: rating for candidate's answer .
31
+ qualitative_feedback : based on the candidate's answer and the given rating.
32
+
33
+ HuggingFace repo_id example:
34
+ - mistralai/Mistral-7B-Instruct-v0.1
35
+
36
+ """
37
+ # read the collected data from excel file
38
+ excel_file_path = (Path.cwd() / "data" / "processed" / "combined_dataset.xlsx").__str__()
39
+ collected_q_a_df = pd.read_excel(excel_file_path, sheet_name='combined')
40
+ collected_q_a_df.columns = [
41
+ x.replace(" ", "_").lower().replace("/", "_or_") for x in collected_q_a_df.columns
42
+ ]
43
+
44
+ # fetch good, average, poor examples for the given question and pass to llm (few shot learning)
45
+ matching_questions = \
46
+ questions_collection.query(
47
+ query_texts=[question],
48
+ where={"position": {"$eq": position}},
49
+ n_results=3,
50
+ )
51
+
52
+ # fetch examples from collected data
53
+ examples = []
54
+ ratings_scope = ['Good', 'Average', 'Poor']
55
+ for rating in ratings_scope:
56
+ matching_rows = \
57
+ collected_q_a_df\
58
+ .query(f"position_or_role == '{position}'")\
59
+ .query(f"question.isin({matching_questions['documents'][0]})")\
60
+ .query(f"answer_quality == '{rating}'")\
61
+ [['question', 'answer']]
62
+ if matching_rows.shape[0] > 0:
63
+ examples.append(
64
+ {
65
+ 'position': position,
66
+ 'question': question,
67
+ 'answer': matching_rows.answer.iloc[0],
68
+ 'Rating': rating,
69
+ }
70
+ )
71
+
72
+ #set up example_template
73
+ example_template = """
74
+ position: {position} .\
75
+ question: {question} \
76
+ answer: {answer}.\
77
+ Rating:{Rating}.\
78
+ """
79
+
80
+ #set up example_prompt
81
+ example_prompt = \
82
+ PromptTemplate(
83
+ input_variables=["position", "question", "answer","Rating"],
84
+ template=example_template,
85
+ )
86
+
87
+ # Set up prefix prompt
88
+ prefix = """
89
+ ### instruction: you are an experienced interviewer.\
90
+ You are interviewing a candidate for the position of {position} .\
91
+ You are tasked to rate an answer provided by the candidate. You should provide a categorical Rating and qualitative feedback.\
92
+ The categorical rating should be one of the following values: Good, average, or Poor.\
93
+ the qualitative feedback should provide sufficient details to justify the categorical rating.\
94
+ The position and the question asked to the candidate and the answer given by the candidate are given below.\
95
+ also some examples are given below.\
96
+ """
97
+ suffix = """
98
+ position : {position} .\
99
+ question : {question} \
100
+ answer : {answer}.\
101
+ qualitative_feedback:
102
+ """
103
+
104
+ few_shot_prompt_template = \
105
+ FewShotPromptTemplate(
106
+ examples=examples,
107
+ example_prompt=example_prompt,
108
+ prefix=prefix,
109
+ suffix=suffix,
110
+ input_variables=["position", "question", "answer"],
111
+ example_separator="\\\n\\\n",
112
+ )
113
+
114
+ # send prompt to LLM using the common function
115
+ response = \
116
+ llm_inference(
117
+ model_type="huggingface",
118
+ input_variables_list=[ position, question, answer],
119
+ prompt_template=few_shot_prompt_template,
120
+ hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1",
121
+ inference_type = "evaluation",
122
+ temperature=0.1,
123
+ max_length=32000,
124
+ )
125
+
126
+ return 'None', response
127
+
128
+ def evaluate_answer_obsolete(
129
+ question: str,
130
+ answer: str,
131
+ position: str,
132
+ ):
133
+ """Call HuggingFace/OpenAI model for inference
134
+
135
+ Given a question,answer, and position , this function calls the relevant
136
+ API to fetch LLM inference results.
137
+
138
+ Args:
139
+ question: The generated question from our database
140
+ answer: answer given by the candidate
141
+ position: job position that the candidate applying for
142
+
143
+ Returns:
144
+ Rating: rating for candidate's answer .
145
+ qualitative_feedback : based on the candidate's answer and the given rating.
146
+
147
+ HuggingFace repo_id example:
148
+ - mistralai/Mistral-7B-Instruct-v0.1
149
+
150
+ """
151
+ # Set up prompt and chain
152
+ prompt = (
153
+ """### instruction: you are an experienced interviewer.\
154
+ You are interviewing a candidate for the position of {position} .\
155
+ You are tasked to rate an answer provided by the candidate. You should provide a categorical rating and qualitative_feedback.\
156
+ The categorical rating should be one of the following values: Good, average, or Poor.\
157
+ the qualitative_feedback should provide sufficient details to justify the categorical rating.\
158
+ the format instructions of the output and the question asked to the candidate and the answer given by the candidate are given below.\
159
+ ### format instruction: {format_instructions}.\
160
+ ### question:{question}.\
161
+ ### answer:{answer}.\
162
+ ### Rating:
163
+ """
164
+ )
165
+
166
+ # Define Rating and feedback schema
167
+ Rating_schema = ResponseSchema(name="Rating",
168
+ description="it was the categorical value for the answer given by the candidate and this value could be poor, average or good. \
169
+ ,the categorical value given by you as an experienced interviewer. \
170
+ after asking a candidate a question related to the position he is applying for")
171
+
172
+ #defining feedback schema
173
+ qualitative_feedback_schema = ResponseSchema(name="qualitative_feedback",
174
+ description="the qualitative feedback is the sufficient details which is given by you as an Experienced interviewer. \
175
+ the qualitative feedback is given after asking the candidate a question related to the position he is applying for, \
176
+ and the candidate provided his answer. \
177
+ the qualitative feedback should provide sufficient details to justify the categorical rating ")
178
+ # Stack the two schemas
179
+ response_schemas = [Rating_schema, qualitative_feedback_schema]
180
+
181
+ # Parsing the output
182
+ output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
183
+
184
+ # Extracting format instructions
185
+ format_instructions = output_parser.get_format_instructions()
186
+
187
+ # apply evaluation using hugging inference API
188
+ response = llm_inference(
189
+ model_type="huggingface",
190
+ input_variables_list=[position, format_instructions, question, answer],
191
+ prompt_template=prompt,
192
+ hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1",
193
+ inference_type = "evaluation",
194
+ temperature=0.1,
195
+ max_length=2024,
196
+ )
197
+
198
+ # Output dictionary having two keys "Rating" and "qualitative_feedback"
199
+ output_dict = output_parser.parse(response)
200
+
201
+ return output_dict["Rating"] , output_dict["qualitative_feedback"]
202
+
203
+ def evaluate_all_answers(
204
+ interview_history: pd.DataFrame,
205
+ questions_collection: chromadb.Collection,
206
+ ):
207
+ """Evaluates all answers from interview history and obtains categorical rating
208
+ as well as qualitative feedback.
209
+ """
210
+ # interview history contains all the questions asked in the mock interview
211
+ # and the answers provided by the candidate
212
+ # process each pair (question & answer) one by one and do evaluation
213
+ # columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"]
214
+ for index, row in interview_history.iterrows():
215
+ # get rating and qualitative feedback for a single question - answer pair
216
+ rating, feedback = \
217
+ evaluate_answer(
218
+ question=row.question,
219
+ answer=row.answer,
220
+ position=row.position,
221
+ questions_collection=questions_collection,
222
+ )
223
+
224
+ # update the rating and feedback obtained from llm into the data frame
225
+ interview_history.loc[index, ['ratings', 'feedback']] = [rating, feedback]
226
+
227
+ def get_ratings_for_answers(df: pd.DataFrame):
228
+ arr_random = np.random.default_rng().uniform(low=0,high=1,size=[df.shape[0],1])
229
+ df.loc[:, 'ratings'] = arr_random
230
+
231
+ def get_feedback_for_answers(df: pd.DataFrame):
232
+ df.loc[:, 'feedback'] = 'Some Random Feedback'
233
+
234
+ def get_overall_feedback():
235
+ return 'Some Overall Feedback'
chatbot_functionalities/generate_questions.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import chromadb
3
+ import re
4
+ from chatbot_functionalities.llms import llm_inference
5
+
6
+
7
+ def generate_questions(
8
+ position: str, candidate_profile: str, question_collection: chromadb.Collection
9
+ ) -> pd.DataFrame:
10
+ """This function will generate a set of relevant questions, given the candidate's position of choosing and their profile.
11
+
12
+ Under the hood, it uses semantic search to extract the relevant questions from a vector database containing the
13
+ embeddings of the question bank gathered as part of the project.
14
+
15
+ If a semantic search match is not found based on the position or candidate profile, then an LLM will be used
16
+ to generate a question for that particular interview phase.
17
+
18
+ Args:
19
+ position (str): Position of the candidate for which the interview is taking place.
20
+ candidate_profile (str): Description of the profile of the candidate.
21
+
22
+ Returns:
23
+ pd.DataFrame: Pandas dataframe containing a list of all relevant questions generated, along with the interview phase and candidate profile.
24
+ """
25
+
26
+ # Instantiate an empty pandas DataFrame.
27
+ question_df = pd.DataFrame(columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"])
28
+
29
+ # Instantiate empty lists for questions and interview phases. These will become columns in the dataframe at the end.
30
+ questions_list = []
31
+ interview_phase_list = []
32
+
33
+ # Uncomment the below 2 lines if you want to test with custom values.
34
+ # position = "Nurse"
35
+ # candidate_profile = "Dedicated and compassionate Registered Nurse with a diverse background in healthcare. Holds a [Degree or Certification] in Nursing from [Institution]. Proven expertise in providing patient-centered care, managing medical records, and collaborating with interdisciplinary teams. Skilled in administering medications, monitoring vital signs, and implementing nursing care plans. Demonstrates strong communication and interpersonal skills, fostering positive relationships with patients, families, and healthcare professionals. Upholds a commitment to continuous learning and professional development. Adept at maintaining a calm and focused demeanor in high-pressure situations. Excited about contributing clinical skills and compassionate care to a dynamic healthcare environment. [Optional: Specify any specializations, such as critical care, pediatrics, or other relevant areas of expertise.]"
36
+
37
+ # ------------------------------- #
38
+ # -------INTRODUCTION PHASE------ #
39
+ # ------------------------------- #
40
+
41
+ print("Generating questions for introduction phase...\n")
42
+ # Fetch introduction questions using semantic search
43
+ intro_ques_semantic_search = question_collection.query(
44
+ query_texts=[candidate_profile],
45
+ where={
46
+ "$and": [
47
+ {"position": {"$eq": position}},
48
+ {"interview_phase": {"$eq": "Introduction"}},
49
+ ]
50
+ },
51
+ n_results=2,
52
+ )
53
+
54
+ # Check if sufficient(2) introduction questions returned by semantic search.
55
+ if len(intro_ques_semantic_search["documents"][0]) != 2:
56
+ num_ques_to_gen = 2 - len(intro_ques_semantic_search["documents"][0])
57
+ intro_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
58
+ Position applying for : {position}
59
+ Candidate profile summary : {candidate_profile}.
60
+ Using the above information, generate {num_ques_to_gen} introductory question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""
61
+
62
+ intro_ques_llm = llm_inference(
63
+ model_type="huggingface",
64
+ input_variables_list=[position, candidate_profile, num_ques_to_gen],
65
+ prompt_template=intro_template,
66
+ hf_repo_id="tiiuae/falcon-7b-instruct",
67
+ temperature=0.1,
68
+ max_length=64,
69
+ )
70
+ # Using list comprehension to filter out empty strings
71
+ intro_ques_llm_list = [x for x in intro_ques_llm.split("\n") if x != ""]
72
+ # Replace pattern: number followed by a period and space
73
+ pattern = re.compile(r"^\d+\.\s")
74
+ # Replace the specified pattern with an empty string for each element in the list
75
+ intro_ques_llm_list = [re.sub(pattern, "", x) for x in intro_ques_llm_list]
76
+
77
+ questions_list.extend(intro_ques_llm_list)
78
+ questions_list.extend(intro_ques_semantic_search["documents"][0])
79
+ interview_phase_list.extend(["Introduction"] * 2)
80
+ else:
81
+ questions_list.extend(intro_ques_semantic_search["documents"][0])
82
+ interview_phase_list.extend(["Introduction"] * 2)
83
+
84
+ print("Introduction phase question generation complete...\n")
85
+
86
+ # ------------------------------- #
87
+ # -----------CORE PHASE---------- #
88
+ # ------------------------------- #
89
+
90
+ print("Generating questions for core phase...\n")
91
+
92
+ # Fetch core questions using semantic search
93
+ core_ques_semantic_search = question_collection.query(
94
+ query_texts=[candidate_profile],
95
+ where={
96
+ "$and": [
97
+ {"position": {"$eq": position}},
98
+ {"interview_phase": {"$nin": ["Introduction", "Conclusion"]}},
99
+ ]
100
+ },
101
+ n_results=4,
102
+ )
103
+
104
+ # Check if sufficient(4) core questions returned by semantic search.
105
+ if len(core_ques_semantic_search["documents"][0]) != 4:
106
+ num_ques_to_gen = 4 - len(core_ques_semantic_search["documents"][0])
107
+ core_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
108
+ Position applying for : {position}
109
+ Candidate profile summary : {candidate_profile}.
110
+ Using the above information, generate {num_ques_to_gen} position specific question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""
111
+
112
+ core_ques_llm = llm_inference(
113
+ model_type="huggingface",
114
+ input_variables_list=[position, candidate_profile, num_ques_to_gen],
115
+ prompt_template=core_template,
116
+ hf_repo_id="tiiuae/falcon-7b-instruct",
117
+ temperature=0.1,
118
+ max_length=64,
119
+ )
120
+ # Using list comprehension to filter out empty strings
121
+ core_ques_llm_list = [x for x in core_ques_llm.split("\n") if x != ""]
122
+ # Replace pattern: number followed by a period and space
123
+ pattern = re.compile(r"^\d+\.\s")
124
+ # Replace the specified pattern with an empty string for each element in the list
125
+ core_ques_llm_list = [re.sub(pattern, "", x) for x in core_ques_llm_list]
126
+
127
+ questions_list.extend(core_ques_llm_list)
128
+ interview_phase_list.extend(["Core"] * num_ques_to_gen)
129
+ questions_list.extend(core_ques_semantic_search["documents"][0])
130
+ interview_phase_list.extend(
131
+ [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
132
+ )
133
+ else:
134
+ questions_list.extend(core_ques_semantic_search["documents"][0])
135
+ interview_phase_list.extend(
136
+ [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
137
+ )
138
+
139
+ print("Core phase question generation complete...\n")
140
+
141
+ # Add lists as columns to the Dataframe.
142
+ question_df["question"] = questions_list
143
+ question_df["interview_phase"] = interview_phase_list
144
+ question_df["position"] = [position] * len(questions_list)
145
+
146
+ return question_df
chatbot_functionalities/llms.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv, find_dotenv
2
+ from langchain.llms import HuggingFaceHub, OpenAI
3
+ from langchain.chains import LLMChain
4
+ from langchain.prompts import PromptTemplate
5
+ import warnings
6
+ from typing import List
7
+ from langchain import FewShotPromptTemplate
8
+
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+
13
+ def llm_inference(
14
+ model_type: str,
15
+ input_variables_list: List[str] = [],
16
+ prompt_template: str = "",
17
+ openai_model_name: str = "",
18
+ hf_repo_id: str = "",
19
+ inference_type : str = "",
20
+ temperature: float = 0.1,
21
+ max_length: int = 64,
22
+ ) -> str:
23
+ """Call HuggingFace/OpenAI model for inference
24
+
25
+ Given a question, prompt_template, and other parameters, this function calls the relevant
26
+ API to fetch LLM inference results.
27
+
28
+ Args:
29
+ model_str: Denotes the LLM vendor's name. Can be either 'huggingface' or 'openai'
30
+ input_variables_list: List of the name of input variables for the prompt.
31
+ prompt_template(Optional): A template for the prompt.
32
+ hf_repo_id: The Huggingface model's repo_id.
33
+ inference_type: Two options, first "evaluation" to evaluate answer privided by the candidate, second generate questions
34
+ temperature: (Default: 1.0). Range: Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
35
+ max_length: Integer to define the maximum length in tokens of the output summary.
36
+
37
+ Returns:
38
+ A Python string which contains the inference result.
39
+
40
+ HuggingFace repo_id examples:
41
+ - mistralai/Mistral-7B-Instruct-v0.1
42
+ - google/flan-t5-xxl
43
+ - tiiuae/falcon-7b-instruct
44
+
45
+
46
+ """
47
+ # Please ensure you have a .env file available with 'HUGGINGFACEHUB_API_TOKEN' and 'OPENAI_API_KEY' values.
48
+ load_dotenv(find_dotenv())
49
+ if inference_type == "evaluation":
50
+ prompt = prompt_template
51
+ else:
52
+ prompt = PromptTemplate(
53
+ template=prompt_template, input_variables=input_variables_list
54
+ )
55
+
56
+ if model_type == "openai":
57
+ # https://api.python.langchain.com/en/stable/llms/langchain.llms.openai.OpenAI.html#langchain.llms.openai.OpenAI
58
+ llm = OpenAI(
59
+ model_name=openai_model_name, temperature=temperature, max_tokens=max_length
60
+ )
61
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
62
+
63
+ if inference_type == "evaluation":
64
+ return llm_chain.run(
65
+ position = input_variables_list[0],
66
+ question=input_variables_list[1],
67
+ answer=input_variables_list[2],
68
+
69
+ )
70
+ else:
71
+ return llm_chain.predict(
72
+ position=input_variables_list[0],
73
+ candidate_profile=input_variables_list[1],
74
+ num_ques_to_gen=input_variables_list[2],
75
+ )
76
+
77
+ elif model_type == "huggingface":
78
+ # https://python.langchain.com/docs/integrations/llms/huggingface_hub
79
+ llm = HuggingFaceHub(
80
+ repo_id=hf_repo_id,
81
+ model_kwargs={"temperature": temperature, "max_length": max_length} )
82
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
83
+
84
+ if inference_type == "evaluation":
85
+ return llm_chain.run(
86
+ position = input_variables_list[0],
87
+ question=input_variables_list[1],
88
+ answer=input_variables_list[2],
89
+
90
+ )
91
+ else:
92
+ return llm_chain.predict(
93
+ position=input_variables_list[0],
94
+ candidate_profile=input_variables_list[1],
95
+ num_ques_to_gen=input_variables_list[2],
96
+ )
97
+
98
+ else:
99
+ print(
100
+ "Please use the correct value of model_type parameter: It can have a value of either openai or huggingface"
101
+ )
102
+
103
+ return ""
chatbot_functionalities/vectordb_operations.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv, find_dotenv
2
+ import pandas as pd
3
+ import os
4
+ import chromadb
5
+ from chromadb.utils import embedding_functions
6
+
7
+
8
+ def generate_qa_vector_db(vdb_path: str, df: pd.DataFrame) -> None:
9
+ """This function processes the dataframe into the required format, and then creates the following collections in a ChromaDB instance
10
+ 1. question_collection - Contains question embeddings, and the metadata as 'position' and 'interview_phase'
11
+ 2. answer_collection - Contains the answer embeddings. No metadata (yet).
12
+
13
+ Args:
14
+ vdb_path (str): Relative path of the location of the ChromaDB instance.
15
+ df (pd.DataFrame): Question/answer dataset.
16
+ """
17
+ chroma_client = chromadb.PersistentClient(path=vdb_path)
18
+
19
+ huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
20
+ api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
21
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
22
+ )
23
+
24
+ print("q_collection will be added")
25
+ q_collection = chroma_client.create_collection(
26
+ name="question_collection",
27
+ metadata={"hnsw:space": "cosine"},
28
+ embedding_function=huggingface_ef,
29
+ )
30
+
31
+ # Keep only question-related columns
32
+ df_questions = df[
33
+ ["Position/Role", "Question", "Interview Phase"]
34
+ ].drop_duplicates()
35
+
36
+ # df_questions = df_questions.drop_duplicates().reset_index(drop=True)
37
+ df_questions.columns = [
38
+ x.replace(" ", "_").lower().replace("/", "_or_") for x in df_questions.columns
39
+ ]
40
+
41
+ q_documents = [row.question for row in df_questions.itertuples()]
42
+ q_metadata = [
43
+ {"position": row.position_or_role, "interview_phase": row.interview_phase}
44
+ for row in df_questions.itertuples()
45
+ ]
46
+ q_ids = ["q_id" + str(row.Index) for row in df_questions.itertuples()]
47
+
48
+ q_collection.add(documents=q_documents, metadatas=q_metadata, ids=q_ids)
49
+ print("q_collection added")
50
+
51
+ print("a_collection will be added")
52
+ a_collection = chroma_client.create_collection(
53
+ name="answer_collection",
54
+ metadata={"hnsw:space": "cosine"},
55
+ embedding_function=huggingface_ef,
56
+ )
57
+
58
+ df_answers = df[["Answer", "Answer Quality"]]
59
+ df_answers.columns = [
60
+ x.replace(" ", "_").lower().replace("/", "_or_") for x in df_answers.columns
61
+ ]
62
+
63
+ a_documents = [row.answer for row in df_answers.itertuples()]
64
+ a_metadata = [
65
+ {"answer_quality": row.answer_quality} for row in df_answers.itertuples()
66
+ ]
67
+ a_ids = ["a_id" + str(row.Index) for row in df_answers.itertuples()]
68
+
69
+ a_collection.add(documents=a_documents, ids=a_ids, metadatas=a_metadata)
70
+ print("a_collection added")
71
+ return None
72
+
73
+
74
+ def delete_collection_from_vector_db(vdb_path: str, collection_name: str) -> None:
75
+ """Deletes a particular collection from the persistent ChromaDB instance.
76
+
77
+ Args:
78
+ vdb_path (str): Path of the persistent ChromaDB instance.
79
+ collection_name (str): Name of the collection to be deleted.
80
+ """
81
+ chroma_client = chromadb.PersistentClient(path=vdb_path)
82
+ chroma_client.delete_collection(collection_name)
83
+ return None
84
+
85
+
86
+ def list_collections_from_vector_db(vdb_path: str) -> None:
87
+ """Lists all the available collections from the persistent ChromaDB instance.
88
+
89
+ Args:
90
+ vdb_path (str): Path of the persistent ChromaDB instance.
91
+ """
92
+ chroma_client = chromadb.PersistentClient(path=vdb_path)
93
+ print(chroma_client.list_collections())
94
+
95
+
96
+ def get_collection_from_vector_db(
97
+ vdb_path: str, collection_name: str
98
+ ) -> chromadb.Collection:
99
+ """Fetches a particular ChromaDB collection object from the persistent ChromaDB instance.
100
+
101
+ Args:
102
+ vdb_path (str): Path of the persistent ChromaDB instance.
103
+ collection_name (str): Name of the collection which needs to be retrieved.
104
+ """
105
+ load_dotenv(find_dotenv())
106
+ chroma_client = chromadb.PersistentClient(path=vdb_path)
107
+
108
+ huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
109
+ api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
110
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
111
+ )
112
+
113
+ collection = chroma_client.get_collection(
114
+ name=collection_name, embedding_function=huggingface_ef
115
+ )
116
+
117
+ return collection
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a83ca203e832eb9230af291a3b445222fbdca949ab8645c0a87092a157169b6
3
+ size 1676000
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff457caa817cdece6129e3f341d4bfdaf24563052863fb73d09d5c296604567b
3
+ size 100
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c830675a783d4b7aa489505e90e7b18e2c42f9181d88e6201d794adf313c99
3
+ size 27113
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194c16e8196c9846ec696980cd3b0fb73397df5a6354753da3555ef5736dcf0e
3
+ size 4000
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e93be3c1b7037804da4e20b50a159f99e9a93e0728dfa16360e93f9df614d7
3
+ size 8148
data/chromadb/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6e9e6a51cad1d50a8c57546b811a05524609b61353d234be72aa5c6b084da5
3
+ size 13639680
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d407af3134c0c7e7db8a8e5f6200033dfd450d4355069df65bd17ca04a3c95cb
3
+ size 3352000
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946da38a184ff153c8553398e912e9d8dada41b5907101c44c74098aa74e1eb0
3
+ size 100
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f6b0010521857c7b326c538e807f5b4a0d18ada6b7e9c6dc1ede31aa7f046e
3
+ size 57125
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59e1d669af55c9a54f56a9a8ea3a0e30b699640c1e86a9741278f66d5528677
3
+ size 8000
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9cbb168a2ec48e3cf003796f17fd8149a05534e9f6c532a25e7ee9a5c23eec6
3
+ size 17316
data/originals/Customer Service Representative.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/originals/Final Dataset Team 3.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/originals/Team 2 Final Dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/combined_dataset.xlsx ADDED
Binary file (358 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechRecognition==3.10.1
2
+ streamlit-mic-recorder==0.0.4
3
+
4
+ pandas==2.1.4
5
+
6
+ python-dotenv==1.0.0
7
+
8
+ nltk==3.8.1
9
+ huggingface-hub==0.19.4
10
+ sentence-transformers==2.2.2
11
+
12
+ pysqlite3-binary
13
+ chromadb==0.4.20
14
+
15
+ tiktoken==0.5.2
16
+ openai==1.5.0
17
+
18
+ langchain==0.0.352