File size: 7,619 Bytes
5efc535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pandas as pd
import chromadb
import re
from chatbot_functionalities.llms import llm_inference


def generate_questions(
    position: str, candidate_profile: str, question_collection: chromadb.Collection
) -> pd.DataFrame:
    """This function will generate a set of relevant questions, given the candidate's position of choosing and their profile.

    Under the hood, it uses semantic search to extract the relevant questions from a vector database containing the
    embeddings of the question bank gathered as part of the project.

    If a semantic search match is not found based on the position or candidate profile, then an LLM will be used
    to generate a question for that particular interview phase.

    Args:
        position (str): Position of the candidate for which the interview is taking place.
        candidate_profile (str): Description of the profile of the candidate.

    Returns:
        pd.DataFrame: Pandas dataframe containing a list of all relevant questions generated, along with the interview phase and candidate profile.
    """

    # Instantiate an empty pandas DataFrame.
    question_df = pd.DataFrame(columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"])

    # Instantiate empty lists for questions and interview phases. These will become columns in the dataframe at the end.
    questions_list = []
    interview_phase_list = []

    # Uncomment the below 2 lines if you want to test with custom values.
    # position = "Nurse"
    # candidate_profile = "Dedicated and compassionate Registered Nurse with a diverse background in healthcare. Holds a [Degree or Certification] in Nursing from [Institution]. Proven expertise in providing patient-centered care, managing medical records, and collaborating with interdisciplinary teams. Skilled in administering medications, monitoring vital signs, and implementing nursing care plans. Demonstrates strong communication and interpersonal skills, fostering positive relationships with patients, families, and healthcare professionals. Upholds a commitment to continuous learning and professional development. Adept at maintaining a calm and focused demeanor in high-pressure situations. Excited about contributing clinical skills and compassionate care to a dynamic healthcare environment. [Optional: Specify any specializations, such as critical care, pediatrics, or other relevant areas of expertise.]"

    # ------------------------------- #
    # -------INTRODUCTION PHASE------ #
    # ------------------------------- #

    print("Generating questions for introduction phase...\n")
    # Fetch introduction questions using semantic search
    intro_ques_semantic_search = question_collection.query(
        query_texts=[candidate_profile],
        where={
            "$and": [
                {"position": {"$eq": position}},
                {"interview_phase": {"$eq": "Introduction"}},
            ]
        },
        n_results=2,
    )

    # Check if sufficient(2) introduction questions returned by semantic search.
    if len(intro_ques_semantic_search["documents"][0]) != 2:
        num_ques_to_gen = 2 - len(intro_ques_semantic_search["documents"][0])
        intro_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
        Position applying for : {position}
        Candidate profile summary : {candidate_profile}.
        Using the above information, generate {num_ques_to_gen} introductory question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""

        intro_ques_llm = llm_inference(
            model_type="huggingface",
            input_variables_list=[position, candidate_profile, num_ques_to_gen],
            prompt_template=intro_template,
            hf_repo_id="tiiuae/falcon-7b-instruct",
            temperature=0.1,
            max_length=64,
        )
        # Using list comprehension to filter out empty strings
        intro_ques_llm_list = [x for x in intro_ques_llm.split("\n") if x != ""]
        # Replace pattern: number followed by a period and space
        pattern = re.compile(r"^\d+\.\s")
        # Replace the specified pattern with an empty string for each element in the list
        intro_ques_llm_list = [re.sub(pattern, "", x) for x in intro_ques_llm_list]

        questions_list.extend(intro_ques_llm_list)
        questions_list.extend(intro_ques_semantic_search["documents"][0])
        interview_phase_list.extend(["Introduction"] * 2)
    else:
        questions_list.extend(intro_ques_semantic_search["documents"][0])
        interview_phase_list.extend(["Introduction"] * 2)

    print("Introduction phase question generation complete...\n")

    # ------------------------------- #
    # -----------CORE PHASE---------- #
    # ------------------------------- #

    print("Generating questions for core phase...\n")

    # Fetch core questions using semantic search
    core_ques_semantic_search = question_collection.query(
        query_texts=[candidate_profile],
        where={
            "$and": [
                {"position": {"$eq": position}},
                {"interview_phase": {"$nin": ["Introduction", "Conclusion"]}},
            ]
        },
        n_results=4,
    )

    # Check if sufficient(4) core questions returned by semantic search.
    if len(core_ques_semantic_search["documents"][0]) != 4:
        num_ques_to_gen = 4 - len(core_ques_semantic_search["documents"][0])
        core_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
        Position applying for : {position}
        Candidate profile summary : {candidate_profile}.
        Using the above information, generate {num_ques_to_gen} position specific question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""

        core_ques_llm = llm_inference(
            model_type="huggingface",
            input_variables_list=[position, candidate_profile, num_ques_to_gen],
            prompt_template=core_template,
            hf_repo_id="tiiuae/falcon-7b-instruct",
            temperature=0.1,
            max_length=64,
        )
        # Using list comprehension to filter out empty strings
        core_ques_llm_list = [x for x in core_ques_llm.split("\n") if x != ""]
        # Replace pattern: number followed by a period and space
        pattern = re.compile(r"^\d+\.\s")
        # Replace the specified pattern with an empty string for each element in the list
        core_ques_llm_list = [re.sub(pattern, "", x) for x in core_ques_llm_list]

        questions_list.extend(core_ques_llm_list)
        interview_phase_list.extend(["Core"] * num_ques_to_gen)
        questions_list.extend(core_ques_semantic_search["documents"][0])
        interview_phase_list.extend(
            [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
        )
    else:
        questions_list.extend(core_ques_semantic_search["documents"][0])
        interview_phase_list.extend(
            [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
        )

    print("Core phase question generation complete...\n")

    # Add lists as columns to the Dataframe.
    question_df["question"] = questions_list
    question_df["interview_phase"] = interview_phase_list
    question_df["position"] = [position] * len(questions_list)

    return question_df