iprepbot

Sleeping

App Files Files Community

pokameswaran commited on Dec 30, 2023

Commit

5efc535

1 Parent(s): 98d6da2

Added files related to the app

Browse files

Files changed (24) hide show

.gitattributes +1 -0
chatbot.py +330 -0
chatbot_functionalities/__init__.py +0 -0
chatbot_functionalities/answer_evaluation.py +208 -0
chatbot_functionalities/evaluate_answers.py +235 -0
chatbot_functionalities/generate_questions.py +146 -0
chatbot_functionalities/llms.py +103 -0
chatbot_functionalities/vectordb_operations.py +117 -0
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/data_level0.bin +3 -0
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/header.bin +3 -0
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/index_metadata.pickle +3 -0
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/length.bin +3 -0
data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/link_lists.bin +3 -0
data/chromadb/chroma.sqlite3 +3 -0
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/data_level0.bin +3 -0
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/header.bin +3 -0
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/index_metadata.pickle +3 -0
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/length.bin +3 -0
data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/link_lists.bin +3 -0
data/originals/Customer Service Representative.csv +0 -0
data/originals/Final Dataset Team 3.csv +0 -0
data/originals/Team 2 Final Dataset.csv +0 -0
data/processed/combined_dataset.xlsx +0 -0
requirements.txt +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/chromadb/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

chatbot.py ADDED Viewed

	@@ -0,0 +1,330 @@

+# dependencies
+import logging
+import streamlit as st
+from streamlit_mic_recorder import speech_to_text
+from pathlib import Path
+from chatbot_functionalities.generate_questions import generate_questions
+from chatbot_functionalities.vectordb_operations import get_collection_from_vector_db
+from chatbot_functionalities.evaluate_answers import evaluate_all_answers, get_overall_feedback
+# enable logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("simple-chatbot")
+# function to initialize web app for the first time
+def initialize_app():
+    """Performs processing that should happen upon loading of the web app and
+    sets all session state variables to their desired initial state.
+    """
+    # set status flags to their desired initial state
+    st.session_state.p01_show_mock_interview = False
+    st.session_state.p01_profile_details_taken = False
+    st.session_state.p01_questions_generated = False
+    st.session_state.p01_record_answer_disabled = False
+    st.session_state.p01_start_mock_interview_disabled = False
+    # initialize variables related to question and interview history
+    st.session_state.p01_current_question = None
+    st.session_state.p01_current_question_index = -1
+    st.session_state.p01_questions_count = 0
+    st.session_state.p01_interview_history = []
+    # first question that will be asked to every candidate
+    # this can be replaced with CV summarization component
+    st.session_state.p01_candidate_profile_question = (
+        "Please provide a brief summary about your education background and prior work experience "
+        "that may be relevant to the chosen job position."
+    )
+    # instruction that will be printed before the microphone button
+    st.session_state.p01_recording_instructions = (
+        "All responses will be captured through the microphone available on your device. "
+        "Ensure that the microphone is working and configured correctly."
+        "Press the 'Record Answer' button and start speaking on the microphone after 1 second."
+    )
+    # fetch the necessary collections from vector db
+    st.session_state.p01_questions_collection = get_collection_from_vector_db(
+        vdb_path=(Path.cwd() / "data" / "chromadb").__str__(),
+        collection_name="question_collection",
+    )
+    # set the flag that indiciates initialization is done
+    # this flag is crucial and should be done as the very last step in this function as
+    # the web app invokes this function only when this variable is not set
+    st.session_state.p01_init_complete = True
+def load_interview_questions():
+    """Helper function to call question generation module"""
+    if not st.session_state.p01_questions_generated:
+        # use candidate provided profile summary and generate subsequent questions to be asked
+        st.session_state.p01_questions_df = generate_questions(
+            position=st.session_state.p01_job_position,
+            candidate_profile=st.session_state.p01_interview_history[1]["content"],
+            question_collection=st.session_state.p01_questions_collection,
+        )
+        # set questions count
+        st.session_state.p01_questions_count = st.session_state.p01_questions_df.shape[
+            0
+        ]
+        # set flag to indicate that questions have been generated
+        st.session_state.p01_questions_generated = True
+        st.session_state.p01_mock_interview_concluded = False
+# function(s) to process user interactions
+def start_mock_interview():
+    """Resets mock interview section of the app and adds the question to
+    collect candidate profile details.
+    """
+    st.session_state.p01_show_mock_interview = True
+    # st.session_state.p01_profile_details_taken = False
+    st.session_state.p01_questions_generated = False
+    st.session_state.p01_interview_history = []
+    st.session_state.p01_record_answer_disabled = False
+    st.session_state.p01_start_mock_interview_disabled = True
+    st.session_state.overall_feedback = None
+    # set current question to candidate profile request question
+    st.session_state.p01_current_question = (
+        st.session_state.p01_candidate_profile_question[:]
+    )
+def speech_recognition_callback():
+    if st.session_state.my_stt_output is None:
+        st.session_state.p01_error_message = "Please record your reponse again."
+        return
+    st.session_state.p01_error_message = None
+    st.session_state.p01_last_candidate_response = st.session_state.my_stt_output
+    # if code reaches this point, then a response was successfully captured and transcribed
+    # append current question and the utterance from the candidate to interview history
+    st.session_state.p01_interview_history.append(
+        dict(role="assistant", content=st.session_state.p01_current_question)
+    )
+    st.session_state.p01_interview_history.append(
+        dict(role="user", content=st.session_state.my_stt_output)
+    )
+    # generate questions if not already done
+    # this is done here instead of 'Start Mock Interview' button because we
+    # CV summarization component is not ready and we need to ask the candidate
+    # to give a profile summary as part of first question
+    if not st.session_state.p01_questions_generated:
+        with st.spinner("Preparing questions for your mock interview"):
+            load_interview_questions()
+    # Add answer to question's dataframe
+    if st.session_state.p01_current_question_index > -1:
+        # ignoring the summary input
+        st.session_state.p01_questions_df.loc[st.session_state.p01_current_question_index, 'answer'] = st.session_state.my_stt_output
+    # change current question to the next available question
+    # check if there are any more question(s) to be asked
+    if (
+        st.session_state.p01_current_question_index
+        < st.session_state.p01_questions_count - 1
+    ):
+        st.session_state.p01_current_question_index += 1
+        st.session_state.p01_current_question = (
+            st.session_state.p01_questions_df.iloc[
+                st.session_state.p01_current_question_index
+            ].question
+        )
+    # no more questions to be asked
+    else:
+        st.session_state.p01_current_question = "Your mock interview is over"
+        st.session_state.p01_record_answer_disabled = True
+        st.session_state.p01_start_mock_interview_disabled = False
+        st.session_state.p01_mock_interview_concluded = True
+    # Since the update is async, the question will not update.
+    # hence forced rerun required.
+    st.experimental_rerun()
+def get_feedback():
+    evaluate_all_answers(
+        interview_history=st.session_state.p01_questions_df,
+        questions_collection=st.session_state.p01_questions_collection,
+        )
+    # get_ratings_for_answers(st.session_state.p01_questions_df)
+    # get_feedback_for_answers(st.session_state.p01_questions_df)
+    st.session_state.overall_feedback = get_overall_feedback()
+# function for rendering the main web application
+def run_web_app():
+    """Renders the web application, captures user actions and
+    invokes appropriate event specific callbacks.
+    """
+    # page or window title - this shows up as browser window title
+    st.set_page_config(page_title="Interview Preparation Assistant")
+    # call initialization function (only for the first time)
+    if "p01_init_complete" not in st.session_state:
+        initialize_app()
+    # setup sidebar
+    # siderbar title
+    st.sidebar.markdown(
+        "<h4 style='color: orange;'>Candidate Profile</h4>",
+        unsafe_allow_html=True,
+    )
+    # user input field to capture name of the candidate
+    candidate_name = st.sidebar.text_input(
+        label="Candidate Name",
+        placeholder="Enter your name",
+        key="p01_candidate_name",
+    )
+    # list of allowed values for job position
+    job_position_options = [
+        "Customer Service Representative",
+        "Sales Manager",
+        "Marketing Manager ",
+        "Nurse",
+        "Medical Assistance",
+    ]
+    # user input field to capture job position for which candidate wants to prepare
+    job_position = st.sidebar.selectbox(
+        label="Job Position",
+        placeholder="Select a job position",
+        options=job_position_options,
+        key="p01_job_position",
+    )
+    # button to start mock interview
+    st.sidebar.button(
+        label="Start Mock Interview",
+        on_click=start_mock_interview,
+        disabled=st.session_state.p01_start_mock_interview_disabled,
+        key="p01_start_mock_interview",
+    )
+    # setup tabs
+    combined_tabs = st.tabs(["Q&A", "History", "Results"])
+    tab1, tab2, tab3 = combined_tabs
+    # render mock interview section in tab 1
+    if st.session_state.p01_show_mock_interview:
+        with tab1:
+            # set page heading (this is a title for the main section of the app)
+            p01_interview_section_title = (
+                f"Mock Interview for {st.session_state.p01_job_position}"
+            )
+            with st.container():
+                st.markdown(
+                    f"<h4 style='color: orange;'>{p01_interview_section_title}</h4>",
+                    unsafe_allow_html=True,
+                )
+            # current question section
+            with st.container():
+                p01_current_question_title = "Current Question"
+                with st.container():
+                    st.markdown(
+                        f"<h6 style='color: orange;'>{p01_current_question_title}</h6>",
+                        unsafe_allow_html=True,
+                    )
+                with st.chat_message("assistant"):
+                    st.markdown(st.session_state.p01_current_question)
+            # button to start recording
+            if 'p01_start_mock_interview_disabled' in st.session_state and st.session_state.p01_start_mock_interview_disabled is True:
+                with st.spinner():
+                    speech_to_text(
+                        key='my_stt',
+                        callback=speech_recognition_callback
+                        )
+            # error message section
+            if "p01_error_message" in st.session_state:
+                if st.session_state.p01_error_message is not None:
+                    with st.container():
+                        st.error(st.session_state.p01_error_message)
+        # render interview history in tab 2
+        with tab2:
+            # loop through interview history and show the messages if they exist
+            p01_interview_history_title = "Interview History"
+            with st.container():
+                st.markdown(
+                    f"<h4 style='color: orange;'>{p01_interview_history_title}</h4>",
+                    unsafe_allow_html=True,
+                )
+                for message in st.session_state.p01_interview_history[::-1]:
+                    with st.chat_message(message["role"]):
+                        st.markdown(message["content"])
+        # render evaluation results and feedback in tab 3
+        # Add interview over flag here
+        with tab3:
+            # loop through evaluation results and show the results if they exist
+            p01_interview_evaluation_title = "Evaluation Results & Feedback"
+            with st.container():
+                st.markdown(
+                    f"<h4 style='color: orange;'>{p01_interview_evaluation_title}</h4>",
+                    unsafe_allow_html=True,
+                )
+                if 'p01_mock_interview_concluded' in st.session_state and st.session_state.p01_mock_interview_concluded is True:
+                    st.button(
+                        label="Get Feedback",
+                        type="primary",
+                        on_click=get_feedback,
+                        key="p01_get_feedback"
+                    )
+                    if 'overall_feedback' in st.session_state and st.session_state.overall_feedback is not None:
+                        if 'p01_questions_df' in st.session_state:
+                            st.markdown(
+                                f"<h6 style='color: orange;'>Question Level Feedback</h6>",
+                                unsafe_allow_html=True,
+                                )
+                            with st.container():
+                                col1, col2, col3 = st.columns(3)
+                                with col1:
+                                    st.markdown(
+                                        f"<h6 style='color: red;'>Question</h6>",
+                                        unsafe_allow_html=True,
+                                        )
+                                with col2:
+                                    st.markdown(
+                                        f"<h6 style='color: red;'>Answer</h6>",
+                                        unsafe_allow_html=True,
+                                        )
+                                with col3:
+                                    st.markdown(
+                                        f"<h6 style='color: red;'>Rating & Feedback</h6>",
+                                        unsafe_allow_html=True,
+                                        )
+                            for row in st.session_state.p01_questions_df.itertuples():
+                                with st.container():
+                                    col1, col2, col3 = st.columns(3)
+                                    with col1:
+                                        st.markdown(row.question)
+                                    with col2:
+                                        st.markdown(row.answer)
+                                    with col3:
+                                        st.markdown(row.feedback)
+                            with st.container():
+                                st.markdown(
+                                    f"<h6 style='color: orange;'>Overall Feedback</h6>",
+                                    unsafe_allow_html=True,
+                                )
+                            with st.chat_message("assistant"):
+                                st.markdown("This functionality will be available in next release.")
+# call the function to render the main web application
+if __name__ == "__main__":
+    run_web_app()

chatbot_functionalities/__init__.py ADDED Viewed

File without changes

chatbot_functionalities/answer_evaluation.py ADDED Viewed

	@@ -0,0 +1,208 @@

+from langchain import FewShotPromptTemplate
+from chatbot_functionalities.llms import llm_inference
+def evaluate_answer(
+    question: str,
+    answer: str,
+    position: str,
+) -> str:
+     """Call HuggingFace/OpenAI model for inference
+    Given a question,answer, and position , this function calls the relevant
+    API to fetch LLM inference results.
+    Args:
+        question: The generated question from our database
+        answer: answer given by the candidate
+        position: job position that the candidate applying for
+    Returns:
+        Rating: rating for candidate's answer .
+        qualitative_feedback : based on the candidate's answer and the given rating.
+    HuggingFace repo_id example:
+        - mistralai/Mistral-7B-Instruct-v0.1
+    """
+     if position == "Customer Service Representative":
+    #set up examples
+          examples = [
+             {
+            "position": f"""{position}""",
+            "question": """How can you improve a dissatisfied customer's experience?""",
+            "answer": """I've found the most successful strategy for turning an unhappy customer into a happy customer is by actively listening to what they're saying. Sometimes, customers just want you to listen to them, and they want to feel like the company cares about them and their opinions. \
+                    For example, I once had a customer who got home to find there was only one shoe in their shoebox. They were quite upset, so I let them explain the issue and then I validated their feelings and provided them with a discount on the purchase along with the missing shoe. They left in a much better mood and became a loyal customer.""",
+            "Rating" : "Good",
+            #"qualitative_feedback": """The candidate's response is rated as 'Good.' The answer not only emphasizes the importance of active listening but also provides a specific and illustrative example to support the strategy. The candidate goes beyond general advice by recounting a real scenario where a customer faced an issue, demonstrating a practical application of the suggested approach. The mention of validating the customer's feelings and offering a discount, along with the missing shoe, shows a proactive and customer-focused problem-solving approach. This response indicates a strong understanding of customer service principles and an ability to apply them effectively in challenging situations, resulting in customer satisfaction and loyalty."""
+            },{
+            "position": f"""{position}""",
+            "question": """How can you improve a dissatisfied customer's experience?""",
+            "answer": """I've found the most successful strategy for turning an unhappy customer into a happy customer is by actively listening to what they're saying. Sometimes, customers just want you to listen to them, and they want to feel like the company cares about them and their opinions. """,
+            "Rating": "Average",
+            #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While the answer acknowledges the importance of active listening, it lacks depth in providing a comprehensive strategy for improving a dissatisfied customer's experience. The candidate briefly mentions the significance of making the customer feel cared for, but there is a lack of specific actions or steps to address and resolve the customer's concerns. A stronger response could have included additional elements such as empathetic communication, prompt issue resolution, and, if applicable, offering appropriate compensation or solutions. The answer, though acknowledging a key aspect, falls short of providing a well-rounded and detailed approach to handling dissatisfied customers."""
+            },{
+            "position": f"""{position}""",
+            "question": """How can you improve a dissatisfied customer's experience?""",
+            "answer": "  I was playing a game.",
+            "Rating" : "Poor",
+            #"qualitative_feedback": """The candidate's response is rated as 'Poor.' The answer provided does not address the question and appears to be irrelevant to the context of improving a dissatisfied customer's experience. It lacks any relevant information or insight into customer service strategies. A strong response should have focused on practical approaches, communication skills, and problem-solving methods to enhance the customer experience. The candidate's answer demonstrates a misunderstanding of the question and an inability to provide a relevant and thoughtful response."""
+                }
+              ]
+     elif position == "Nurse":
+    #set up examples
+          examples = [
+             {
+            "position": f"""{position}""",
+            "question": """ how do you handle the stress of the job ?""",
+            "answer": """I find the best way to handle the stress of the job is through meticulous organization and attention to detail. By making lists and prioritizing what needs to get done throughout my day I find that tasks which might seem overwhelming all at once are much more manageable. This also makes it possible for me to stay calm and remain focused on what needs to get done when unexpected situations arise.""",
+            "Rating" : "Good",
+            #"qualitative_feedback": """The candidate's response is rated as 'Good.' They provide a well-thought-out and practical approach to handling the stress of the nursing job. The emphasis on meticulous organization, attention to detail, and prioritization through making lists is a strong strategy for managing workload and preventing tasks from becoming overwhelming. The candidate's acknowledgment of the inevitability of unexpected situations and the ability to remain calm and focused in such scenarios demonstrates adaptability and resilience. Overall, the response showcases effective coping mechanisms that align with the demands of a nursing role, indicating a proactive and organized approach to stress management."""
+            },{
+            "position": f"""{position}""",
+            "question": """how do you handle the stress of the job ?""",
+            "answer": """I handle stress by focusing on the most important thing the care of the patient. I feel I owe it to my patients to stay calm and focused on them. """,
+            "Rating": "Average",
+            #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While the answer acknowledges a strategy for handling stress by focusing on patient care, it lacks depth in providing additional coping mechanisms or self-care strategies. A more robust response could have included personal methods for maintaining work-life balance, seeking support from colleagues, or engaging in stress-relief activities outside of work. Additionally, the candidate could have elaborated on how maintaining focus on patient care contributes to their overall stress management. While the answer is acceptable, it falls slightly short of providing a more comprehensive understanding of the candidate's approach to handling stress in the nursing role."""
+            },{
+            "position": f"""{position}""",
+            "question": """ how do you handle the stress of the job ?""",
+            "answer": "I like a fast-paced pressure-filled environment that makes my job invigorating.",
+            "Rating" : "Poor",
+            #"qualitative_feedback": """The candidate's response is rated as 'Poor.' While expressing a preference for a fast-paced and pressure-filled environment can indicate adaptability, the answer lacks depth in addressing how the candidate actively manages and handles stress in the nursing job. A strong response would have included specific strategies or coping mechanisms, such as organization, prioritization, or self-care practices, to demonstrate a proactive approach to stress management. The current answer is vague and does not provide insight into the candidate's ability to handle the inherent stress of the nursing role, which is crucial for the position. A more detailed and focused response would have been more appropriate."""
+                }
+              ]
+     elif position == "Marketing Manager":
+    #set up examples
+          examples = [
+             {
+            "position": f"""{position}""",
+            "question": """Are you a team player? """,
+            "answer": """I am absolutely a team player. My perspective has always been that if my team succeeds, I succeed, and if I succeed, my team succeeds. I think work is a lot more fun when you're sharing your time and energy with people who want to raise each other up.""",
+            "Rating" : "Good",
+            #"qualitative_feedback": """The candidate's response is rated as 'Good.' They express a positive and collaborative attitude towards teamwork. The candidate emphasizes the mutual success of both individual and team, demonstrating an understanding of the interconnectedness of personal and team achievements. The mention of finding work more enjoyable when sharing time and energy with supportive team members adds a personal touch to the answer. Overall, the response conveys a strong commitment to teamwork and suggests that the candidate values a collaborative work environment, which is a positive trait for a Marketing Manager role."""
+            },{
+            "position": f"""{position}""",
+            "question": """Are rich snippets important for SEO ?""",
+            "answer": """"Having rich snippets can help search results stand out and increase the click-through rate. In the long run, it can positively affect page ranking, too.""",
+            "Rating": "Average",
+            #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While the answer acknowledges the importance of rich snippets for SEO by mentioning that they can help search results stand out and increase click-through rates, it lacks depth in providing a more comprehensive explanation. A stronger response could have delved into the specific types of information that can be included in rich snippets, their impact on user engagement, and how they contribute to a better user experience. Additionally, the candidate could have elaborated on how search engines use rich snippets to understand the content better. The answer, though correct in recognizing the value of rich snippets, falls short of providing a more detailed and insightful response."""
+            },{
+            "position": f"""{position}""",
+            "question": """Can you discuss a time when a marketing campaign didn't perform as expected? How did you handle it, and what did you learn from the experience?""",
+            "answer": " I never had a campaign fail on me. All my campaigns were successful.",
+            "Rating" : "Poor",
+            #"qualitative_feedback": """The candidate's response is rated as 'Poor.' The answer lacks credibility and does not align with the reality of marketing, where not all campaigns are guaranteed to be successful. A more realistic and honest approach would have been to acknowledge that marketing campaigns can face challenges and share a specific instance where a campaign did not perform as expected. This would have provided an opportunity for the candidate to demonstrate problem-solving skills, adaptability, and the ability to learn from setbacks. The lack of humility and the claim that all campaigns were successful suggests a lack of transparency and self-awareness, which are crucial qualities for a Marketing Manager."""
+                }
+              ]
+     elif position == "Sales Manager":
+    #set up examples
+          examples = [
+             {
+            "position": f"""{position}""",
+            "question": """Why do you want the sales manager position?""",
+            "answer": """ I enjoyed what I read about this company and your products. I am ecstatic at the possibility of working for you. I love working with teams and helping to guide them to give it their all every day because that’s what I will do as the sales manager. I appreciate all the rave reviews about your products and want to help get your sales to the next level.\
+                          In my previous job, I was promoted to start a new sales team and got to choose team members. I looked at everyone’s personalities, experiences, strengths and weaknesses to create a team that would balance each other. I know I can succeed as the sales manager for this company and want the opportunity to show you how I can help this company reach new heights.""",
+            "Rating" : "Good",
+            #"qualitative_feedback": """The candidate's response is rated as 'Good.' They provide a well-rounded answer that demonstrates genuine enthusiasm for the company and the sales manager position. The mention of enjoying what they read about the company and its products, along with expressing excitement at the possibility of working there, conveys a positive attitude. The candidate articulates a passion for working with teams and guiding them to excel, aligning with the responsibilities of a sales manager. Additionally, the mention of past success in starting a new sales team and strategically selecting team members showcases relevant experience and leadership skills. The candidate's commitment to contributing to the company's growth and taking it to the next level adds value to their response. Overall, the answer effectively communicates a strong interest in the position and the ability to make meaningful contributions to the sales team."""
+            },{
+            "position": f"""{position}""",
+            "question": """Why do you want the sales manager position?""",
+            "answer": """"I enjoyed what I read about this company and your products. I am ecstatic at the possibility of working for you. I love working with teams and helping to guide them to give it their all every day because that’s what I will do as the sales manager. I appreciate all the rave reviews about your products and want to help get your sales to the next level.\
+                          """,
+            "Rating": "Average",
+            #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While expressing excitement about the company and the products, the answer lacks specific details about the candidate's qualifications or experiences that make them suitable for the sales manager position. The mention of loving to work with teams and guide them is positive, but it could be enhanced by providing examples of past successes or leadership experiences in managing sales teams. Additionally, the candidate expresses a desire to help elevate sales but does not offer a clear strategy or insights into how they plan to achieve this goal. A stronger response would include more concrete details about the candidate's skills, experiences, and how they intend to contribute to the company's sales growth."""
+            },{
+            "position": f"""{position}""",
+            "question": """Why do you want the sales manager position?""",
+            "answer": " I enjoyed what I read about this company and your products.",
+            "Rating" : "Poor",
+            #"qualitative_feedback": """The candidate's response is rated as 'Poor.' The answer is overly brief and lacks substance. While expressing enjoyment about the company and its products is positive, it does not provide any meaningful insights into the candidate's qualifications, motivations, or specific reasons for wanting the sales manager position. A strong response would include details about the candidate's relevant skills, experiences, and how they plan to contribute to the success of the sales team. The current answer falls short of demonstrating a genuine interest in the role and does not convey a strong commitment to the position."""
+                }
+              ]
+     #position == "Medical Assistance"
+     else  :
+    #set up examples
+          examples = [
+             {
+            "position": f"""{position}""",
+            "question": """Can you tell me about a time you overcame a difficult situation?""",
+            "answer": """ When I was working at the hospital, I communicated with an upset mother who insisted on being in the operating room with her son during his surgery. As this violated hospital rules, I knew I couldn't allow her in the room. Instead of becoming impatient with her, I tried to be empathetic about her situation. I understood she felt scared and didn't know about our safety procedures.\
+                          I told her I understood her situation and knew she just wanted the best for her son. Next, I informed her politely of the hospital's policies and why they were in place, emphasizing that following them would help keep her son safe. I even promised to give her hourly updates, which comforted her and increased her trust in the medical team. She thanked me for speaking with her and providing great care for her son.""",
+            "Rating" : "Good",
+            #"qualitative_feedback": """The candidate's response is rated as 'Good.' They provide a detailed and well-structured example of overcoming a difficult situation in a medical setting. The candidate effectively demonstrates strong communication and empathy skills in dealing with an upset mother. They not only recognized and validated the mother's emotions but also explained the hospital's policies with empathy and understanding. The offer of hourly updates to comfort the mother and build trust in the medical team shows a proactive and patient-focused approach. Overall, the response showcases the candidate's ability to handle challenging situations with empathy, effective communication, and a commitment to patient care."""
+            },{
+            "position": f"""{position}""",
+            "question": """Can you tell me about a time you overcame a difficult situation?""",
+            "answer": """"When I was working at the hospital, I communicated with an upset mother who insisted on being in the operating room with her son during his surgery. As this violated hospital rules, I knew I couldn't allow her in the room. Instead of becoming impatient with her, I tried to be empathetic about her situation. I understood she felt scared and didn't know about our safety procedures.
+                          """,
+            "Rating": "Average",
+            #"qualitative_feedback":"""The candidate's response is rated as 'Average.' While they provide a specific example of overcoming a difficult situation in a medical setting, the response lacks some depth. The candidate effectively communicates empathy and understanding towards the upset mother's situation, which is positive. However, the answer could be improved by providing more details about the resolution or outcome of the situation. Offering insights into how the candidate successfully navigated the violation of hospital rules, the mother's reaction to the explanation, or any additional steps taken would have added more substance to the response. Overall, while the answer is acceptable, there is room for enhancement in providing a more comprehensive account of the situation."""
+            },{
+            "position": f"""{position}""",
+            "question": """Can you tell me about a time you overcame a difficult situation?""",
+            "answer": " When I was working at the hospital, I communicated with an upset mother who insisted on being in the operating room with her son during his surgery.",
+            "Rating" : "Poor",
+            #"qualitative_feedback": """The candidate's response is rated as 'Poor.' While the candidate starts to describe a challenging situation involving an upset mother, the answer is incomplete and lacks necessary details. The response does not provide information on how the candidate handled the situation, what actions were taken, or the resolution of the problem. A strong answer to this question should include specific actions taken, the candidate's thought process, and the positive outcome or lessons learned from overcoming the difficult situation. In its current form, the response lacks the depth and completeness needed to showcase the candidate's problem-solving and interpersonal skills effectively."""
+                }
+              ]
+    #set up example_template
+     example_template = """
+          position: {position} .\
+          question: {question} \
+          answer: {answer}.\
+          Rating:{Rating}.\
+         """
+         #qualitative_feedback:{qualitative_feedback}.\
+    #set up example_prompt
+     example_prompt = PromptTemplate(
+      input_variables=["position", "question", "answer","Rating"],
+      template=example_template
+        )
+    # Set up prefix prompt
+     prefix = """
+        ### instruction: you are an experienced interviewer.\
+         You are interviewing a candidate for the position of {position} .\
+         You are tasked to rate an answer provided by the candidate. You should provide a categorical Rating and qualitative feedback.\
+          The categorical rating should be one of the following values: Good, average, or  Poor.\
+            the qualitative feedback should provide sufficient details to justify the categorical rating.\
+            The position and the question asked to the candidate and the answer given by the candidate are  given below.\
+            also some examples are given below.\
+            """
+     suffix = """
+      position : {position} .\
+      question : {question} \
+      answer : {answer}.\
+      qualitative_feedback:
+    """
+     few_shot_prompt_template = FewShotPromptTemplate(
+      examples=examples,
+      example_prompt=example_prompt,
+      prefix=prefix,
+      suffix=suffix,
+      input_variables=["position", "question", "answer"],
+      example_separator="\\\n\\\n" )
+    # send prompt to LLM using the common function
+     response = llm_inference(
+                model_type="huggingface",
+                input_variables_list=[ position, question, answer],
+                prompt_template=few_shot_prompt_template,
+                hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1",
+                inference_type = "evaluation",
+                temperature=0.1,
+                max_length=32000,
+            )
+     return response

chatbot_functionalities/evaluate_answers.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import pandas as pd
+import numpy as np
+import chromadb
+from chatbot_functionalities.llms import llm_inference
+from langchain.output_parsers import ResponseSchema
+from langchain.output_parsers import StructuredOutputParser
+from typing import List
+from langchain.prompts import PromptTemplate
+from langchain import FewShotPromptTemplate
+from pathlib import Path
+def evaluate_answer(
+    question: str,
+    answer: str,
+    position: str,
+    questions_collection: chromadb.Collection,
+    ):
+    """Call HuggingFace/OpenAI model for inference
+    Given a question,answer, and position , this function calls the relevant
+    API to fetch LLM inference results.
+    Args:
+        question: The generated question from our database
+        answer: answer given by the candidate
+        position: job position that the candidate applying for
+    Returns:
+        Rating: rating for candidate's answer .
+        qualitative_feedback : based on the candidate's answer and the given rating.
+    HuggingFace repo_id example:
+        - mistralai/Mistral-7B-Instruct-v0.1
+    """
+    # read the collected data from excel file
+    excel_file_path = (Path.cwd() / "data" / "processed" / "combined_dataset.xlsx").__str__()
+    collected_q_a_df = pd.read_excel(excel_file_path, sheet_name='combined')
+    collected_q_a_df.columns = [
+        x.replace(" ", "_").lower().replace("/", "_or_") for x in collected_q_a_df.columns
+    ]
+    # fetch good, average, poor examples for the given question and pass to llm (few shot learning)
+    matching_questions = \
+        questions_collection.query(
+            query_texts=[question],
+            where={"position": {"$eq": position}},
+            n_results=3,
+        )
+    # fetch examples from collected data
+    examples = []
+    ratings_scope = ['Good', 'Average', 'Poor']
+    for rating in ratings_scope:
+        matching_rows = \
+            collected_q_a_df\
+                .query(f"position_or_role == '{position}'")\
+                .query(f"question.isin({matching_questions['documents'][0]})")\
+                .query(f"answer_quality == '{rating}'")\
+                [['question', 'answer']]
+        if matching_rows.shape[0] > 0:
+            examples.append(
+                {
+                    'position': position,
+                    'question': question,
+                    'answer': matching_rows.answer.iloc[0],
+                    'Rating': rating,
+                }
+            )
+    #set up example_template
+    example_template = """
+        position: {position} .\
+        question: {question} \
+        answer: {answer}.\
+        Rating:{Rating}.\
+        """
+    #set up example_prompt
+    example_prompt = \
+        PromptTemplate(
+            input_variables=["position", "question", "answer","Rating"],
+            template=example_template,
+            )
+    # Set up prefix prompt
+    prefix = """
+        ### instruction: you are an experienced interviewer.\
+        You are interviewing a candidate for the position of {position} .\
+        You are tasked to rate an answer provided by the candidate. You should provide a categorical Rating and qualitative feedback.\
+        The categorical rating should be one of the following values: Good, average, or  Poor.\
+        the qualitative feedback should provide sufficient details to justify the categorical rating.\
+        The position and the question asked to the candidate and the answer given by the candidate are  given below.\
+        also some examples are given below.\
+        """
+    suffix = """
+        position : {position} .\
+        question : {question} \
+        answer : {answer}.\
+        qualitative_feedback:
+    """
+    few_shot_prompt_template = \
+        FewShotPromptTemplate(
+            examples=examples,
+            example_prompt=example_prompt,
+            prefix=prefix,
+            suffix=suffix,
+            input_variables=["position", "question", "answer"],
+            example_separator="\\\n\\\n",
+            )
+    # send prompt to LLM using the common function
+    response = \
+        llm_inference(
+            model_type="huggingface",
+            input_variables_list=[ position, question, answer],
+            prompt_template=few_shot_prompt_template,
+            hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1",
+            inference_type = "evaluation",
+            temperature=0.1,
+            max_length=32000,
+            )
+    return 'None', response
+def evaluate_answer_obsolete(
+    question: str,
+    answer: str,
+    position: str,
+):
+    """Call HuggingFace/OpenAI model for inference
+    Given a question,answer, and position , this function calls the relevant
+    API to fetch LLM inference results.
+    Args:
+        question: The generated question from our database
+        answer: answer given by the candidate
+        position: job position that the candidate applying for
+    Returns:
+        Rating: rating for candidate's answer .
+        qualitative_feedback : based on the candidate's answer and the given rating.
+    HuggingFace repo_id example:
+        - mistralai/Mistral-7B-Instruct-v0.1
+    """
+    # Set up prompt and chain
+    prompt = (
+        """### instruction: you are an experienced interviewer.\
+         You are interviewing a candidate for the position of {position} .\
+         You are tasked to rate an answer provided by the candidate. You should provide a categorical rating and qualitative_feedback.\
+          The categorical rating should be one of the following values: Good, average, or  Poor.\
+            the qualitative_feedback should provide sufficient details to justify the categorical rating.\
+            the format instructions of the output and the question asked to the candidate and the answer given by the candidate are  given below.\
+            ### format instruction: {format_instructions}.\
+            ### question:{question}.\
+            ### answer:{answer}.\
+            ### Rating:
+            """
+    )
+    # Define Rating and feedback schema
+    Rating_schema = ResponseSchema(name="Rating",
+                                   description="it was the categorical value for the answer given by the candidate and this value could be poor, average or good. \
+                                       ,the categorical value given by you as an experienced interviewer. \
+                                      after asking a candidate a question related to the position he is applying for")
+      #defining feedback schema
+    qualitative_feedback_schema = ResponseSchema(name="qualitative_feedback",
+                                                  description="the qualitative feedback is the sufficient details  which is given by you as an Experienced interviewer. \
+                                                      the qualitative feedback is given after asking the candidate a question related to the position he is applying for, \
+                                                       and the candidate provided his answer. \
+                                                        the qualitative feedback should provide sufficient details to justify the categorical rating ")
+    # Stack the two schemas
+    response_schemas = [Rating_schema, qualitative_feedback_schema]
+    # Parsing the output
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+    # Extracting format instructions
+    format_instructions = output_parser.get_format_instructions()
+    # apply evaluation using hugging inference API
+    response = llm_inference(
+                model_type="huggingface",
+                input_variables_list=[position, format_instructions, question, answer],
+                prompt_template=prompt,
+                hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1",
+                inference_type = "evaluation",
+                temperature=0.1,
+                max_length=2024,
+            )
+    # Output dictionary having two keys "Rating" and "qualitative_feedback"
+    output_dict = output_parser.parse(response)
+    return output_dict["Rating"] , output_dict["qualitative_feedback"]
+def evaluate_all_answers(
+    interview_history: pd.DataFrame,
+    questions_collection: chromadb.Collection,
+    ):
+    """Evaluates all answers from interview history and obtains categorical rating
+    as well as qualitative feedback.
+    """
+    # interview history contains all the questions asked in the mock interview
+    # and the answers provided by the candidate
+    # process each pair (question & answer) one by one and do evaluation
+    # columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"]
+    for index, row in interview_history.iterrows():
+        # get rating and qualitative feedback for a single question - answer pair
+        rating, feedback = \
+            evaluate_answer(
+                question=row.question,
+                answer=row.answer,
+                position=row.position,
+                questions_collection=questions_collection,
+                )
+        # update the rating and feedback obtained from llm into the data frame
+        interview_history.loc[index, ['ratings', 'feedback']] = [rating, feedback]
+def get_ratings_for_answers(df: pd.DataFrame):
+    arr_random = np.random.default_rng().uniform(low=0,high=1,size=[df.shape[0],1])
+    df.loc[:, 'ratings'] = arr_random
+def get_feedback_for_answers(df: pd.DataFrame):
+    df.loc[:, 'feedback'] = 'Some Random Feedback'
+def get_overall_feedback():
+    return 'Some Overall Feedback'

chatbot_functionalities/generate_questions.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import pandas as pd
+import chromadb
+import re
+from chatbot_functionalities.llms import llm_inference
+def generate_questions(
+    position: str, candidate_profile: str, question_collection: chromadb.Collection
+) -> pd.DataFrame:
+    """This function will generate a set of relevant questions, given the candidate's position of choosing and their profile.
+    Under the hood, it uses semantic search to extract the relevant questions from a vector database containing the
+    embeddings of the question bank gathered as part of the project.
+    If a semantic search match is not found based on the position or candidate profile, then an LLM will be used
+    to generate a question for that particular interview phase.
+    Args:
+        position (str): Position of the candidate for which the interview is taking place.
+        candidate_profile (str): Description of the profile of the candidate.
+    Returns:
+        pd.DataFrame: Pandas dataframe containing a list of all relevant questions generated, along with the interview phase and candidate profile.
+    """
+    # Instantiate an empty pandas DataFrame.
+    question_df = pd.DataFrame(columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"])
+    # Instantiate empty lists for questions and interview phases. These will become columns in the dataframe at the end.
+    questions_list = []
+    interview_phase_list = []
+    # Uncomment the below 2 lines if you want to test with custom values.
+    # position = "Nurse"
+    # candidate_profile = "Dedicated and compassionate Registered Nurse with a diverse background in healthcare. Holds a [Degree or Certification] in Nursing from [Institution]. Proven expertise in providing patient-centered care, managing medical records, and collaborating with interdisciplinary teams. Skilled in administering medications, monitoring vital signs, and implementing nursing care plans. Demonstrates strong communication and interpersonal skills, fostering positive relationships with patients, families, and healthcare professionals. Upholds a commitment to continuous learning and professional development. Adept at maintaining a calm and focused demeanor in high-pressure situations. Excited about contributing clinical skills and compassionate care to a dynamic healthcare environment. [Optional: Specify any specializations, such as critical care, pediatrics, or other relevant areas of expertise.]"
+    # ------------------------------- #
+    # -------INTRODUCTION PHASE------ #
+    # ------------------------------- #
+    print("Generating questions for introduction phase...\n")
+    # Fetch introduction questions using semantic search
+    intro_ques_semantic_search = question_collection.query(
+        query_texts=[candidate_profile],
+        where={
+            "$and": [
+                {"position": {"$eq": position}},
+                {"interview_phase": {"$eq": "Introduction"}},
+            ]
+        },
+        n_results=2,
+    )
+    # Check if sufficient(2) introduction questions returned by semantic search.
+    if len(intro_ques_semantic_search["documents"][0]) != 2:
+        num_ques_to_gen = 2 - len(intro_ques_semantic_search["documents"][0])
+        intro_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
+        Position applying for : {position}
+        Candidate profile summary : {candidate_profile}.
+        Using the above information, generate {num_ques_to_gen} introductory question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""
+        intro_ques_llm = llm_inference(
+            model_type="huggingface",
+            input_variables_list=[position, candidate_profile, num_ques_to_gen],
+            prompt_template=intro_template,
+            hf_repo_id="tiiuae/falcon-7b-instruct",
+            temperature=0.1,
+            max_length=64,
+        )
+        # Using list comprehension to filter out empty strings
+        intro_ques_llm_list = [x for x in intro_ques_llm.split("\n") if x != ""]
+        # Replace pattern: number followed by a period and space
+        pattern = re.compile(r"^\d+\.\s")
+        # Replace the specified pattern with an empty string for each element in the list
+        intro_ques_llm_list = [re.sub(pattern, "", x) for x in intro_ques_llm_list]
+        questions_list.extend(intro_ques_llm_list)
+        questions_list.extend(intro_ques_semantic_search["documents"][0])
+        interview_phase_list.extend(["Introduction"] * 2)
+    else:
+        questions_list.extend(intro_ques_semantic_search["documents"][0])
+        interview_phase_list.extend(["Introduction"] * 2)
+    print("Introduction phase question generation complete...\n")
+    # ------------------------------- #
+    # -----------CORE PHASE---------- #
+    # ------------------------------- #
+    print("Generating questions for core phase...\n")
+    # Fetch core questions using semantic search
+    core_ques_semantic_search = question_collection.query(
+        query_texts=[candidate_profile],
+        where={
+            "$and": [
+                {"position": {"$eq": position}},
+                {"interview_phase": {"$nin": ["Introduction", "Conclusion"]}},
+            ]
+        },
+        n_results=4,
+    )
+    # Check if sufficient(4) core questions returned by semantic search.
+    if len(core_ques_semantic_search["documents"][0]) != 4:
+        num_ques_to_gen = 4 - len(core_ques_semantic_search["documents"][0])
+        core_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
+        Position applying for : {position}
+        Candidate profile summary : {candidate_profile}.
+        Using the above information, generate {num_ques_to_gen} position specific question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""
+        core_ques_llm = llm_inference(
+            model_type="huggingface",
+            input_variables_list=[position, candidate_profile, num_ques_to_gen],
+            prompt_template=core_template,
+            hf_repo_id="tiiuae/falcon-7b-instruct",
+            temperature=0.1,
+            max_length=64,
+        )
+        # Using list comprehension to filter out empty strings
+        core_ques_llm_list = [x for x in core_ques_llm.split("\n") if x != ""]
+        # Replace pattern: number followed by a period and space
+        pattern = re.compile(r"^\d+\.\s")
+        # Replace the specified pattern with an empty string for each element in the list
+        core_ques_llm_list = [re.sub(pattern, "", x) for x in core_ques_llm_list]
+        questions_list.extend(core_ques_llm_list)
+        interview_phase_list.extend(["Core"] * num_ques_to_gen)
+        questions_list.extend(core_ques_semantic_search["documents"][0])
+        interview_phase_list.extend(
+            [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
+        )
+    else:
+        questions_list.extend(core_ques_semantic_search["documents"][0])
+        interview_phase_list.extend(
+            [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
+        )
+    print("Core phase question generation complete...\n")
+    # Add lists as columns to the Dataframe.
+    question_df["question"] = questions_list
+    question_df["interview_phase"] = interview_phase_list
+    question_df["position"] = [position] * len(questions_list)
+    return question_df

chatbot_functionalities/llms.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from dotenv import load_dotenv, find_dotenv
+from langchain.llms import HuggingFaceHub, OpenAI
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+import warnings
+from typing import List
+from langchain import FewShotPromptTemplate
+warnings.filterwarnings("ignore")
+def llm_inference(
+    model_type: str,
+    input_variables_list: List[str] = [],
+    prompt_template: str = "",
+    openai_model_name: str = "",
+    hf_repo_id: str = "",
+    inference_type : str = "",
+    temperature: float = 0.1,
+    max_length: int = 64,
+) -> str:
+    """Call HuggingFace/OpenAI model for inference
+    Given a question, prompt_template, and other parameters, this function calls the relevant
+    API to fetch LLM inference results.
+    Args:
+        model_str: Denotes the LLM vendor's name. Can be either 'huggingface' or 'openai'
+        input_variables_list: List of the name of input variables for the prompt.
+        prompt_template(Optional): A template for the prompt.
+        hf_repo_id: The Huggingface model's repo_id.
+        inference_type: Two options, first "evaluation" to evaluate answer privided by the candidate, second generate questions
+        temperature: (Default: 1.0). Range: Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
+        max_length: Integer to define the maximum length in tokens of the output summary.
+    Returns:
+        A Python string which contains the inference result.
+    HuggingFace repo_id examples:
+        - mistralai/Mistral-7B-Instruct-v0.1
+        - google/flan-t5-xxl
+        - tiiuae/falcon-7b-instruct
+    """
+    # Please ensure you have a .env file available with 'HUGGINGFACEHUB_API_TOKEN' and 'OPENAI_API_KEY' values.
+    load_dotenv(find_dotenv())
+    if inference_type == "evaluation":
+      prompt = prompt_template
+    else:
+      prompt = PromptTemplate(
+          template=prompt_template, input_variables=input_variables_list
+    )
+    if model_type == "openai":
+        # https://api.python.langchain.com/en/stable/llms/langchain.llms.openai.OpenAI.html#langchain.llms.openai.OpenAI
+        llm = OpenAI(
+            model_name=openai_model_name, temperature=temperature, max_tokens=max_length
+        )
+        llm_chain = LLMChain(prompt=prompt, llm=llm)
+        if inference_type == "evaluation":
+          return llm_chain.run(
+            position = input_variables_list[0],
+            question=input_variables_list[1],
+            answer=input_variables_list[2],
+            )
+        else:
+            return llm_chain.predict(
+                position=input_variables_list[0],
+                candidate_profile=input_variables_list[1],
+                num_ques_to_gen=input_variables_list[2],
+        )
+    elif model_type == "huggingface":
+        # https://python.langchain.com/docs/integrations/llms/huggingface_hub
+        llm = HuggingFaceHub(
+            repo_id=hf_repo_id,
+            model_kwargs={"temperature": temperature, "max_length": max_length} )
+        llm_chain = LLMChain(prompt=prompt, llm=llm)
+        if inference_type == "evaluation":
+          return llm_chain.run(
+            position = input_variables_list[0],
+            question=input_variables_list[1],
+            answer=input_variables_list[2],
+            )
+        else:
+            return llm_chain.predict(
+                position=input_variables_list[0],
+                candidate_profile=input_variables_list[1],
+                num_ques_to_gen=input_variables_list[2],
+        )
+    else:
+        print(
+            "Please use the correct value of model_type parameter: It can have a value of either openai or huggingface"
+        )
+        return ""

chatbot_functionalities/vectordb_operations.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from dotenv import load_dotenv, find_dotenv
+import pandas as pd
+import os
+import chromadb
+from chromadb.utils import embedding_functions
+def generate_qa_vector_db(vdb_path: str, df: pd.DataFrame) -> None:
+    """This function processes the dataframe into the required format, and then creates the following collections in a ChromaDB instance
+    1. question_collection - Contains question embeddings, and the metadata as 'position' and 'interview_phase'
+    2. answer_collection - Contains the answer embeddings. No metadata (yet).
+    Args:
+        vdb_path (str): Relative path of the location of the ChromaDB instance.
+        df (pd.DataFrame): Question/answer dataset.
+    """
+    chroma_client = chromadb.PersistentClient(path=vdb_path)
+    huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
+        api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
+        model_name="sentence-transformers/all-MiniLM-L6-v2",
+    )
+    print("q_collection will be added")
+    q_collection = chroma_client.create_collection(
+        name="question_collection",
+        metadata={"hnsw:space": "cosine"},
+        embedding_function=huggingface_ef,
+    )
+    # Keep only question-related columns
+    df_questions = df[
+        ["Position/Role", "Question", "Interview Phase"]
+    ].drop_duplicates()
+    # df_questions = df_questions.drop_duplicates().reset_index(drop=True)
+    df_questions.columns = [
+        x.replace(" ", "_").lower().replace("/", "_or_") for x in df_questions.columns
+    ]
+    q_documents = [row.question for row in df_questions.itertuples()]
+    q_metadata = [
+        {"position": row.position_or_role, "interview_phase": row.interview_phase}
+        for row in df_questions.itertuples()
+    ]
+    q_ids = ["q_id" + str(row.Index) for row in df_questions.itertuples()]
+    q_collection.add(documents=q_documents, metadatas=q_metadata, ids=q_ids)
+    print("q_collection added")
+    print("a_collection will be added")
+    a_collection = chroma_client.create_collection(
+        name="answer_collection",
+        metadata={"hnsw:space": "cosine"},
+        embedding_function=huggingface_ef,
+    )
+    df_answers = df[["Answer", "Answer Quality"]]
+    df_answers.columns = [
+        x.replace(" ", "_").lower().replace("/", "_or_") for x in df_answers.columns
+    ]
+    a_documents = [row.answer for row in df_answers.itertuples()]
+    a_metadata = [
+        {"answer_quality": row.answer_quality} for row in df_answers.itertuples()
+    ]
+    a_ids = ["a_id" + str(row.Index) for row in df_answers.itertuples()]
+    a_collection.add(documents=a_documents, ids=a_ids, metadatas=a_metadata)
+    print("a_collection added")
+    return None
+def delete_collection_from_vector_db(vdb_path: str, collection_name: str) -> None:
+    """Deletes a particular collection from the persistent ChromaDB instance.
+    Args:
+        vdb_path (str): Path of the persistent ChromaDB instance.
+        collection_name (str): Name of the collection to be deleted.
+    """
+    chroma_client = chromadb.PersistentClient(path=vdb_path)
+    chroma_client.delete_collection(collection_name)
+    return None
+def list_collections_from_vector_db(vdb_path: str) -> None:
+    """Lists all the available collections from the persistent ChromaDB instance.
+    Args:
+        vdb_path (str): Path of the persistent ChromaDB instance.
+    """
+    chroma_client = chromadb.PersistentClient(path=vdb_path)
+    print(chroma_client.list_collections())
+def get_collection_from_vector_db(
+    vdb_path: str, collection_name: str
+) -> chromadb.Collection:
+    """Fetches a particular ChromaDB collection object from the persistent ChromaDB instance.
+    Args:
+        vdb_path (str): Path of the persistent ChromaDB instance.
+        collection_name (str): Name of the collection which needs to be retrieved.
+    """
+    load_dotenv(find_dotenv())
+    chroma_client = chromadb.PersistentClient(path=vdb_path)
+    huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
+        api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
+        model_name="sentence-transformers/all-MiniLM-L6-v2",
+    )
+    collection = chroma_client.get_collection(
+        name=collection_name, embedding_function=huggingface_ef
+    )
+    return collection

data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a83ca203e832eb9230af291a3b445222fbdca949ab8645c0a87092a157169b6
+size 1676000

data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff457caa817cdece6129e3f341d4bfdaf24563052863fb73d09d5c296604567b
+size 100

data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c830675a783d4b7aa489505e90e7b18e2c42f9181d88e6201d794adf313c99
+size 27113

data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:194c16e8196c9846ec696980cd3b0fb73397df5a6354753da3555ef5736dcf0e
+size 4000

data/chromadb/a48dcc07-8cd8-4e70-9baa-d501529cde2e/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56e93be3c1b7037804da4e20b50a159f99e9a93e0728dfa16360e93f9df614d7
+size 8148

data/chromadb/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6e9e6a51cad1d50a8c57546b811a05524609b61353d234be72aa5c6b084da5
+size 13639680

data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d407af3134c0c7e7db8a8e5f6200033dfd450d4355069df65bd17ca04a3c95cb
+size 3352000

data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:946da38a184ff153c8553398e912e9d8dada41b5907101c44c74098aa74e1eb0
+size 100

data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f6b0010521857c7b326c538e807f5b4a0d18ada6b7e9c6dc1ede31aa7f046e
+size 57125

data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d59e1d669af55c9a54f56a9a8ea3a0e30b699640c1e86a9741278f66d5528677
+size 8000

data/chromadb/d75413bb-8f5a-4c3d-b905-2a017b87ba02/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9cbb168a2ec48e3cf003796f17fd8149a05534e9f6c532a25e7ee9a5c23eec6
+size 17316

data/originals/Customer Service Representative.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/originals/Final Dataset Team 3.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/originals/Team 2 Final Dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/processed/combined_dataset.xlsx ADDED Viewed

Binary file (358 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+SpeechRecognition==3.10.1
+streamlit-mic-recorder==0.0.4
+pandas==2.1.4
+python-dotenv==1.0.0
+nltk==3.8.1
+huggingface-hub==0.19.4
+sentence-transformers==2.2.2
+pysqlite3-binary
+chromadb==0.4.20
+tiktoken==0.5.2
+openai==1.5.0
+langchain==0.0.352