Spaces:
Runtime error
Runtime error
File size: 7,619 Bytes
5efc535 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import pandas as pd
import chromadb
import re
from chatbot_functionalities.llms import llm_inference
def generate_questions(
position: str, candidate_profile: str, question_collection: chromadb.Collection
) -> pd.DataFrame:
"""This function will generate a set of relevant questions, given the candidate's position of choosing and their profile.
Under the hood, it uses semantic search to extract the relevant questions from a vector database containing the
embeddings of the question bank gathered as part of the project.
If a semantic search match is not found based on the position or candidate profile, then an LLM will be used
to generate a question for that particular interview phase.
Args:
position (str): Position of the candidate for which the interview is taking place.
candidate_profile (str): Description of the profile of the candidate.
Returns:
pd.DataFrame: Pandas dataframe containing a list of all relevant questions generated, along with the interview phase and candidate profile.
"""
# Instantiate an empty pandas DataFrame.
question_df = pd.DataFrame(columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"])
# Instantiate empty lists for questions and interview phases. These will become columns in the dataframe at the end.
questions_list = []
interview_phase_list = []
# Uncomment the below 2 lines if you want to test with custom values.
# position = "Nurse"
# candidate_profile = "Dedicated and compassionate Registered Nurse with a diverse background in healthcare. Holds a [Degree or Certification] in Nursing from [Institution]. Proven expertise in providing patient-centered care, managing medical records, and collaborating with interdisciplinary teams. Skilled in administering medications, monitoring vital signs, and implementing nursing care plans. Demonstrates strong communication and interpersonal skills, fostering positive relationships with patients, families, and healthcare professionals. Upholds a commitment to continuous learning and professional development. Adept at maintaining a calm and focused demeanor in high-pressure situations. Excited about contributing clinical skills and compassionate care to a dynamic healthcare environment. [Optional: Specify any specializations, such as critical care, pediatrics, or other relevant areas of expertise.]"
# ------------------------------- #
# -------INTRODUCTION PHASE------ #
# ------------------------------- #
print("Generating questions for introduction phase...\n")
# Fetch introduction questions using semantic search
intro_ques_semantic_search = question_collection.query(
query_texts=[candidate_profile],
where={
"$and": [
{"position": {"$eq": position}},
{"interview_phase": {"$eq": "Introduction"}},
]
},
n_results=2,
)
# Check if sufficient(2) introduction questions returned by semantic search.
if len(intro_ques_semantic_search["documents"][0]) != 2:
num_ques_to_gen = 2 - len(intro_ques_semantic_search["documents"][0])
intro_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
Position applying for : {position}
Candidate profile summary : {candidate_profile}.
Using the above information, generate {num_ques_to_gen} introductory question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""
intro_ques_llm = llm_inference(
model_type="huggingface",
input_variables_list=[position, candidate_profile, num_ques_to_gen],
prompt_template=intro_template,
hf_repo_id="tiiuae/falcon-7b-instruct",
temperature=0.1,
max_length=64,
)
# Using list comprehension to filter out empty strings
intro_ques_llm_list = [x for x in intro_ques_llm.split("\n") if x != ""]
# Replace pattern: number followed by a period and space
pattern = re.compile(r"^\d+\.\s")
# Replace the specified pattern with an empty string for each element in the list
intro_ques_llm_list = [re.sub(pattern, "", x) for x in intro_ques_llm_list]
questions_list.extend(intro_ques_llm_list)
questions_list.extend(intro_ques_semantic_search["documents"][0])
interview_phase_list.extend(["Introduction"] * 2)
else:
questions_list.extend(intro_ques_semantic_search["documents"][0])
interview_phase_list.extend(["Introduction"] * 2)
print("Introduction phase question generation complete...\n")
# ------------------------------- #
# -----------CORE PHASE---------- #
# ------------------------------- #
print("Generating questions for core phase...\n")
# Fetch core questions using semantic search
core_ques_semantic_search = question_collection.query(
query_texts=[candidate_profile],
where={
"$and": [
{"position": {"$eq": position}},
{"interview_phase": {"$nin": ["Introduction", "Conclusion"]}},
]
},
n_results=4,
)
# Check if sufficient(4) core questions returned by semantic search.
if len(core_ques_semantic_search["documents"][0]) != 4:
num_ques_to_gen = 4 - len(core_ques_semantic_search["documents"][0])
core_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information:
Position applying for : {position}
Candidate profile summary : {candidate_profile}.
Using the above information, generate {num_ques_to_gen} position specific question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions."""
core_ques_llm = llm_inference(
model_type="huggingface",
input_variables_list=[position, candidate_profile, num_ques_to_gen],
prompt_template=core_template,
hf_repo_id="tiiuae/falcon-7b-instruct",
temperature=0.1,
max_length=64,
)
# Using list comprehension to filter out empty strings
core_ques_llm_list = [x for x in core_ques_llm.split("\n") if x != ""]
# Replace pattern: number followed by a period and space
pattern = re.compile(r"^\d+\.\s")
# Replace the specified pattern with an empty string for each element in the list
core_ques_llm_list = [re.sub(pattern, "", x) for x in core_ques_llm_list]
questions_list.extend(core_ques_llm_list)
interview_phase_list.extend(["Core"] * num_ques_to_gen)
questions_list.extend(core_ques_semantic_search["documents"][0])
interview_phase_list.extend(
[d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
)
else:
questions_list.extend(core_ques_semantic_search["documents"][0])
interview_phase_list.extend(
[d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]]
)
print("Core phase question generation complete...\n")
# Add lists as columns to the Dataframe.
question_df["question"] = questions_list
question_df["interview_phase"] = interview_phase_list
question_df["position"] = [position] * len(questions_list)
return question_df
|