mikaelmp commited on
Commit
ebc3a7c
·
verified ·
1 Parent(s): 1bdef6a

BIG UPDATE: separate parse and validate methods

Browse files
researchsimulation/InteractiveInterviewChatbot.py CHANGED
@@ -21,35 +21,7 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
21
 
22
  ### User Input:
23
  {question}
24
-
25
- ### Permitted Topics Scope:
26
- The respondents may only answer questions related to the following general topics:
27
-
28
- - Demographics: Age, location, education, family background, life events.
29
- - Values & Beliefs: Family responsibility, independence, hard work, gender equality, spirituality, simplicity, mental health, traditional vs modern values.
30
- - Career & Aspirations: Education, career goals, entrepreneurship, financial independence, stability, ambition, and personal development.
31
- - Influences & Role Models: Family members, mentors, public figures, influencers.
32
- - Interests & Hobbies: Sports, music, fitness, cooking, creative arts, gaming, travel, entertainment content, podcasts, leisure.
33
- - Health & Lifestyle: Physical health, fitness, diet, skincare, self-care, mental wellbeing, lifestyle balance.
34
- - Social Media & Technology: Social media usage, digital content, influencer interests, technology habits.
35
- - Personal Relationships: Family, friends, romantic relationships, support systems, social circles.
36
- - Future Outlook: Career plans, financial security, personal growth, family goals, confidence building.
37
- - Social & Societal Issues: Gender equality, societal expectations, economic issues, tradition vs freedom, social development.
38
- - Lifestyle Preferences: Food preferences, fashion, routines, spending habits, religious or cultural practices.
39
- - Personal Growth & Development: Maturity, emotional regulation, responsibility, adaptability, self-improvement, learning mindset.
40
-
41
- If a question is not strictly relevant to the topics, immediately return "INVALID" as the question instead of extracting a question.
42
-
43
- ### IMPORTANT RULES:
44
- - Only extract questions that fall within the **Permitted Topics Scope** above.
45
- - If the question is not strictly relevant to the Permitted Topics, return "INVALID" as the question instead of extracting a question.
46
- - For **each respondent**, if the question directed to them is out of scope, return "INVALID" as their question.
47
- - Even if one or more respondents are addressed by name, **do not answer** unless their question is within scope.
48
- - Do not infer or soften language to make it in-scope — the original question must already fit the topics.
49
- - Do not assume or infer intent beyond these boundaries.
50
- - When extracting the question, if any American English spelling, phrasing, or vocabulary is detected, automatically convert it into correct British English spelling, grammar, and usage before returning it.
51
- - Use standard British English conventions for all words (e.g. organise, behaviour, licence, travelling, programme, aeroplane, etc.).
52
- - DO NOT mirror the original spelling if it contains American English — always apply British spelling rules.
53
  ### Instructions:
54
  1. Identify **each respondent being addressed**.
55
  The respondents available are {respondent_names}. If these names are mistyped, then ensure that you match the names to the ones available.
@@ -58,9 +30,9 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
58
  3. Ensure extracted questions **match the original structure**.
59
  4. If no respondent is explicitly addressed, return "General" as the respondent name.
60
  5. If the question is posed to all respondents, return "All" as the respondent name.
61
- 6. Ensure that you follow the formatting rules exactly. THIS IS EXTREMELY IMPORTANT.
62
 
63
- ### Formatting Rules:
64
  Provide the output strictly in this format:
65
  - Respondent: <Respondent Name>
66
  Question: <Extracted Question>
@@ -72,11 +44,6 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
72
  response = processor_llm.invoke(prompt)
73
  chatgpt_output = response.content.strip()
74
  logging.info(f"LLM Parsed Output: {chatgpt_output}")
75
-
76
- # Handle out-of-scope detection
77
- if chatgpt_output.startswith("INVALID"):
78
- logging.warning("Question rejected: out of scope.")
79
- return None
80
 
81
  parsed_questions = {}
82
  respondent_name = "General"
@@ -94,6 +61,51 @@ def parse_question_with_llm(question, respondent_names, processor_llm):
94
 
95
  return parsed_questions
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
98
  """
99
  Handles both individual and group interview questions while tracking conversation flow.
@@ -107,17 +119,21 @@ def ask_interview_question(respondent_agents_dict, last_active_agent, question,
107
  logging.info(f"Available respondents: {agent_names}")
108
  print(f"Available respondents: {agent_names}")
109
 
110
- # Use OpenAI LLM to parse question into individual respondent-specific sub-questions
 
 
111
  parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
112
-
113
  if not parsed_questions:
114
- logging.warning("No parsed questions returned. Exiting function.")
115
- return "**PreData Moderator**: No valid respondents were detected for this question."
116
 
117
- for resp_name, extracted_question in parsed_questions.items():
 
 
118
  if extracted_question == "INVALID":
119
- logging.warning("Invalid question detected during parsing.")
120
- return "**PreData Moderator**: The question is invalid. Please ask another question."
 
 
121
 
122
  if len(parsed_questions) > 1:
123
  logging.warning("More than one respondent specified. Exiting function.")
 
21
 
22
  ### User Input:
23
  {question}
24
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ### Instructions:
26
  1. Identify **each respondent being addressed**.
27
  The respondents available are {respondent_names}. If these names are mistyped, then ensure that you match the names to the ones available.
 
30
  3. Ensure extracted questions **match the original structure**.
31
  4. If no respondent is explicitly addressed, return "General" as the respondent name.
32
  5. If the question is posed to all respondents, return "All" as the respondent name.
33
+ 6. Ensure that you follow the **Formatting Rules** exactly. THIS IS EXTREMELY IMPORTANT.
34
 
35
+ ### **Formatting Rules**:
36
  Provide the output strictly in this format:
37
  - Respondent: <Respondent Name>
38
  Question: <Extracted Question>
 
44
  response = processor_llm.invoke(prompt)
45
  chatgpt_output = response.content.strip()
46
  logging.info(f"LLM Parsed Output: {chatgpt_output}")
 
 
 
 
 
47
 
48
  parsed_questions = {}
49
  respondent_name = "General"
 
61
 
62
  return parsed_questions
63
 
64
+ def validate_question_topics(parsed_questions, processor_llm):
65
+ """
66
+ Validates each question to ensure it's within the permitted topic scope.
67
+ Converts question to British English spelling if valid.
68
+ Returns 'INVALID' for any out-of-scope question.
69
+ """
70
+ validated_questions = {}
71
+
72
+ for respondent, question in parsed_questions.items():
73
+ prompt = f"""
74
+ You are a senior research analyst. Your job is to **validate** whether a market research question is within the allowed topic scope and convert it to **British English** spelling, grammar, and phrasing.
75
+
76
+ ### Question:
77
+ {question}
78
+
79
+ ### Permitted Topics Scope:
80
+ The respondents may only answer questions related to the following general topics:
81
+
82
+ - Demographics: Age, location, education, family background, life events.
83
+ - Values & Beliefs: Family responsibility, independence, hard work, gender equality, spirituality, simplicity, mental health, traditional vs modern values.
84
+ - Career & Aspirations: Education, career goals, entrepreneurship, financial independence, stability, ambition, and personal development.
85
+ - Influences & Role Models: Family members, mentors, public figures, influencers.
86
+ - Interests & Hobbies: Sports, music, fitness, cooking, creative arts, gaming, travel, entertainment content, podcasts, leisure.
87
+ - Health & Lifestyle: Physical health, fitness, diet, skincare, self-care, mental wellbeing, lifestyle balance.
88
+ - Social Media & Technology: Social media usage, digital content, influencer interests, technology habits.
89
+ - Personal Relationships: Family, friends, romantic relationships, support systems, social circles.
90
+ - Future Outlook: Career plans, financial security, personal growth, family goals, confidence building.
91
+ - Social & Societal Issues: Gender equality, societal expectations, economic issues, tradition vs freedom, social development.
92
+ - Lifestyle Preferences: Food preferences, fashion, routines, spending habits, religious or cultural practices.
93
+ - Personal Growth & Development: Maturity, emotional regulation, responsibility, adaptability, self-improvement, learning mindset.
94
+
95
+ ### Validation Instructions:
96
+ - If the question is not strictly relevant to any of the above, return exactly: "INVALID"
97
+ - If valid, return the **same question**, rewritten in **British English** if necessary.
98
+
99
+ ### Output:
100
+ <Validated question in British English, or "INVALID">
101
+ """
102
+
103
+ response = processor_llm.invoke(prompt)
104
+ validated_output = response.content.strip()
105
+ validated_questions[respondent] = validated_output
106
+
107
+ return validated_questions
108
+
109
  def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
110
  """
111
  Handles both individual and group interview questions while tracking conversation flow.
 
119
  logging.info(f"Available respondents: {agent_names}")
120
  print(f"Available respondents: {agent_names}")
121
 
122
+ # Use OpenAI LLM to parse questions into individual respondent-specific sub-questions and validate them
123
+
124
+ # Step 1: Parse question
125
  parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
 
126
  if not parsed_questions:
127
+ return ["**PreData Moderator**: No valid respondents were detected for this question."]
 
128
 
129
+ # Step 2: Validate question content (scope + spelling)
130
+ validated_questions = validate_question_topics(parsed_questions, processor_llm)
131
+ for resp_name, extracted_question in validated_questions.items():
132
  if extracted_question == "INVALID":
133
+ return ["**PreData Moderator**: The question is invalid. Please ask another question."]
134
+
135
+ # Use validated questions from this point on
136
+ parsed_questions = validated_questions
137
 
138
  if len(parsed_questions) > 1:
139
  logging.warning("More than one respondent specified. Exiting function.")