Spaces:
Build error
Build error
BIG UPDATE: separate parse and validate methods
Browse files
researchsimulation/InteractiveInterviewChatbot.py
CHANGED
|
@@ -21,35 +21,7 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
|
|
| 21 |
|
| 22 |
### User Input:
|
| 23 |
{question}
|
| 24 |
-
|
| 25 |
-
### Permitted Topics Scope:
|
| 26 |
-
The respondents may only answer questions related to the following general topics:
|
| 27 |
-
|
| 28 |
-
- Demographics: Age, location, education, family background, life events.
|
| 29 |
-
- Values & Beliefs: Family responsibility, independence, hard work, gender equality, spirituality, simplicity, mental health, traditional vs modern values.
|
| 30 |
-
- Career & Aspirations: Education, career goals, entrepreneurship, financial independence, stability, ambition, and personal development.
|
| 31 |
-
- Influences & Role Models: Family members, mentors, public figures, influencers.
|
| 32 |
-
- Interests & Hobbies: Sports, music, fitness, cooking, creative arts, gaming, travel, entertainment content, podcasts, leisure.
|
| 33 |
-
- Health & Lifestyle: Physical health, fitness, diet, skincare, self-care, mental wellbeing, lifestyle balance.
|
| 34 |
-
- Social Media & Technology: Social media usage, digital content, influencer interests, technology habits.
|
| 35 |
-
- Personal Relationships: Family, friends, romantic relationships, support systems, social circles.
|
| 36 |
-
- Future Outlook: Career plans, financial security, personal growth, family goals, confidence building.
|
| 37 |
-
- Social & Societal Issues: Gender equality, societal expectations, economic issues, tradition vs freedom, social development.
|
| 38 |
-
- Lifestyle Preferences: Food preferences, fashion, routines, spending habits, religious or cultural practices.
|
| 39 |
-
- Personal Growth & Development: Maturity, emotional regulation, responsibility, adaptability, self-improvement, learning mindset.
|
| 40 |
-
|
| 41 |
-
If a question is not strictly relevant to the topics, immediately return "INVALID" as the question instead of extracting a question.
|
| 42 |
-
|
| 43 |
-
### IMPORTANT RULES:
|
| 44 |
-
- Only extract questions that fall within the **Permitted Topics Scope** above.
|
| 45 |
-
- If the question is not strictly relevant to the Permitted Topics, return "INVALID" as the question instead of extracting a question.
|
| 46 |
-
- For **each respondent**, if the question directed to them is out of scope, return "INVALID" as their question.
|
| 47 |
-
- Even if one or more respondents are addressed by name, **do not answer** unless their question is within scope.
|
| 48 |
-
- Do not infer or soften language to make it in-scope — the original question must already fit the topics.
|
| 49 |
-
- Do not assume or infer intent beyond these boundaries.
|
| 50 |
-
- When extracting the question, if any American English spelling, phrasing, or vocabulary is detected, automatically convert it into correct British English spelling, grammar, and usage before returning it.
|
| 51 |
-
- Use standard British English conventions for all words (e.g. organise, behaviour, licence, travelling, programme, aeroplane, etc.).
|
| 52 |
-
- DO NOT mirror the original spelling if it contains American English — always apply British spelling rules.
|
| 53 |
### Instructions:
|
| 54 |
1. Identify **each respondent being addressed**.
|
| 55 |
The respondents available are {respondent_names}. If these names are mistyped, then ensure that you match the names to the ones available.
|
|
@@ -58,9 +30,9 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
|
|
| 58 |
3. Ensure extracted questions **match the original structure**.
|
| 59 |
4. If no respondent is explicitly addressed, return "General" as the respondent name.
|
| 60 |
5. If the question is posed to all respondents, return "All" as the respondent name.
|
| 61 |
-
6. Ensure that you follow the
|
| 62 |
|
| 63 |
-
### Formatting Rules:
|
| 64 |
Provide the output strictly in this format:
|
| 65 |
- Respondent: <Respondent Name>
|
| 66 |
Question: <Extracted Question>
|
|
@@ -72,11 +44,6 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
|
|
| 72 |
response = processor_llm.invoke(prompt)
|
| 73 |
chatgpt_output = response.content.strip()
|
| 74 |
logging.info(f"LLM Parsed Output: {chatgpt_output}")
|
| 75 |
-
|
| 76 |
-
# Handle out-of-scope detection
|
| 77 |
-
if chatgpt_output.startswith("INVALID"):
|
| 78 |
-
logging.warning("Question rejected: out of scope.")
|
| 79 |
-
return None
|
| 80 |
|
| 81 |
parsed_questions = {}
|
| 82 |
respondent_name = "General"
|
|
@@ -94,6 +61,51 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
|
|
| 94 |
|
| 95 |
return parsed_questions
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
|
| 98 |
"""
|
| 99 |
Handles both individual and group interview questions while tracking conversation flow.
|
|
@@ -107,17 +119,21 @@ def ask_interview_question(respondent_agents_dict, last_active_agent, question,
|
|
| 107 |
logging.info(f"Available respondents: {agent_names}")
|
| 108 |
print(f"Available respondents: {agent_names}")
|
| 109 |
|
| 110 |
-
# Use OpenAI LLM to parse
|
|
|
|
|
|
|
| 111 |
parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
|
| 112 |
-
|
| 113 |
if not parsed_questions:
|
| 114 |
-
|
| 115 |
-
return "**PreData Moderator**: No valid respondents were detected for this question."
|
| 116 |
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
if extracted_question == "INVALID":
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
| 121 |
|
| 122 |
if len(parsed_questions) > 1:
|
| 123 |
logging.warning("More than one respondent specified. Exiting function.")
|
|
|
|
| 21 |
|
| 22 |
### User Input:
|
| 23 |
{question}
|
| 24 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
### Instructions:
|
| 26 |
1. Identify **each respondent being addressed**.
|
| 27 |
The respondents available are {respondent_names}. If these names are mistyped, then ensure that you match the names to the ones available.
|
|
|
|
| 30 |
3. Ensure extracted questions **match the original structure**.
|
| 31 |
4. If no respondent is explicitly addressed, return "General" as the respondent name.
|
| 32 |
5. If the question is posed to all respondents, return "All" as the respondent name.
|
| 33 |
+
6. Ensure that you follow the **Formatting Rules** exactly. THIS IS EXTREMELY IMPORTANT.
|
| 34 |
|
| 35 |
+
### **Formatting Rules**:
|
| 36 |
Provide the output strictly in this format:
|
| 37 |
- Respondent: <Respondent Name>
|
| 38 |
Question: <Extracted Question>
|
|
|
|
| 44 |
response = processor_llm.invoke(prompt)
|
| 45 |
chatgpt_output = response.content.strip()
|
| 46 |
logging.info(f"LLM Parsed Output: {chatgpt_output}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
parsed_questions = {}
|
| 49 |
respondent_name = "General"
|
|
|
|
| 61 |
|
| 62 |
return parsed_questions
|
| 63 |
|
| 64 |
+
def validate_question_topics(parsed_questions, processor_llm):
|
| 65 |
+
"""
|
| 66 |
+
Validates each question to ensure it's within the permitted topic scope.
|
| 67 |
+
Converts question to British English spelling if valid.
|
| 68 |
+
Returns 'INVALID' for any out-of-scope question.
|
| 69 |
+
"""
|
| 70 |
+
validated_questions = {}
|
| 71 |
+
|
| 72 |
+
for respondent, question in parsed_questions.items():
|
| 73 |
+
prompt = f"""
|
| 74 |
+
You are a senior research analyst. Your job is to **validate** whether a market research question is within the allowed topic scope and convert it to **British English** spelling, grammar, and phrasing.
|
| 75 |
+
|
| 76 |
+
### Question:
|
| 77 |
+
{question}
|
| 78 |
+
|
| 79 |
+
### Permitted Topics Scope:
|
| 80 |
+
The respondents may only answer questions related to the following general topics:
|
| 81 |
+
|
| 82 |
+
- Demographics: Age, location, education, family background, life events.
|
| 83 |
+
- Values & Beliefs: Family responsibility, independence, hard work, gender equality, spirituality, simplicity, mental health, traditional vs modern values.
|
| 84 |
+
- Career & Aspirations: Education, career goals, entrepreneurship, financial independence, stability, ambition, and personal development.
|
| 85 |
+
- Influences & Role Models: Family members, mentors, public figures, influencers.
|
| 86 |
+
- Interests & Hobbies: Sports, music, fitness, cooking, creative arts, gaming, travel, entertainment content, podcasts, leisure.
|
| 87 |
+
- Health & Lifestyle: Physical health, fitness, diet, skincare, self-care, mental wellbeing, lifestyle balance.
|
| 88 |
+
- Social Media & Technology: Social media usage, digital content, influencer interests, technology habits.
|
| 89 |
+
- Personal Relationships: Family, friends, romantic relationships, support systems, social circles.
|
| 90 |
+
- Future Outlook: Career plans, financial security, personal growth, family goals, confidence building.
|
| 91 |
+
- Social & Societal Issues: Gender equality, societal expectations, economic issues, tradition vs freedom, social development.
|
| 92 |
+
- Lifestyle Preferences: Food preferences, fashion, routines, spending habits, religious or cultural practices.
|
| 93 |
+
- Personal Growth & Development: Maturity, emotional regulation, responsibility, adaptability, self-improvement, learning mindset.
|
| 94 |
+
|
| 95 |
+
### Validation Instructions:
|
| 96 |
+
- If the question is not strictly relevant to any of the above, return exactly: "INVALID"
|
| 97 |
+
- If valid, return the **same question**, rewritten in **British English** if necessary.
|
| 98 |
+
|
| 99 |
+
### Output:
|
| 100 |
+
<Validated question in British English, or "INVALID">
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
response = processor_llm.invoke(prompt)
|
| 104 |
+
validated_output = response.content.strip()
|
| 105 |
+
validated_questions[respondent] = validated_output
|
| 106 |
+
|
| 107 |
+
return validated_questions
|
| 108 |
+
|
| 109 |
def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
|
| 110 |
"""
|
| 111 |
Handles both individual and group interview questions while tracking conversation flow.
|
|
|
|
| 119 |
logging.info(f"Available respondents: {agent_names}")
|
| 120 |
print(f"Available respondents: {agent_names}")
|
| 121 |
|
| 122 |
+
# Use OpenAI LLM to parse questions into individual respondent-specific sub-questions and validate them
|
| 123 |
+
|
| 124 |
+
# Step 1: Parse question
|
| 125 |
parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
|
|
|
|
| 126 |
if not parsed_questions:
|
| 127 |
+
return ["**PreData Moderator**: No valid respondents were detected for this question."]
|
|
|
|
| 128 |
|
| 129 |
+
# Step 2: Validate question content (scope + spelling)
|
| 130 |
+
validated_questions = validate_question_topics(parsed_questions, processor_llm)
|
| 131 |
+
for resp_name, extracted_question in validated_questions.items():
|
| 132 |
if extracted_question == "INVALID":
|
| 133 |
+
return ["**PreData Moderator**: The question is invalid. Please ask another question."]
|
| 134 |
+
|
| 135 |
+
# Use validated questions from this point on
|
| 136 |
+
parsed_questions = validated_questions
|
| 137 |
|
| 138 |
if len(parsed_questions) > 1:
|
| 139 |
logging.warning("More than one respondent specified. Exiting function.")
|