mikaelmp commited on
Commit
fbc8125
·
verified ·
1 Parent(s): 1337d24

Splitting trial

Browse files
researchsimulation/InteractiveInterviewChatbot.py CHANGED
@@ -207,249 +207,153 @@ def validate_question_topics(parsed_questions, processor_llm):
207
 
208
 
209
  def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
210
- """
211
- Handles both individual and group interview questions while tracking conversation flow.
212
- Uses OpenAI's LLM to extract the intended respondent(s) and their specific question(s).
213
- Uses Groq's LLM for response generation.
214
- """
215
-
216
- logging.info(f"START: Processing new interview question: {question}")
217
- responses = []
218
-
219
  agent_names = list(respondent_agents_dict.keys())
220
- logging.info(f"Available respondents: {agent_names}")
221
- print(f"Available respondents: {agent_names}")
222
 
223
- # Use OpenAI LLM to parse questions into individual respondent-specific sub-questions and validate them
224
-
225
- # Step 1: Parse question
226
- logging.info("STEP 1: Parsing question with LLM...")
227
  parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
228
- logging.info(f"Parsed Questions Output: {parsed_questions}")
229
-
230
  if not parsed_questions:
231
- logging.warning("No questions were parsed from input.")
232
  return ["**PreData Moderator**: No valid respondents were detected for this question."]
233
 
234
- # Step 2: Validate question content (scope + spelling)
235
- logging.info("STEP 2: Validating questions for topic relevance and British English...")
236
  validated_questions = validate_question_topics(parsed_questions, processor_llm)
237
- logging.info(f"Validated Questions: {validated_questions}")
238
-
239
- for resp_name, extracted_question in validated_questions.items():
240
- if extracted_question == "INVALID":
241
- logging.warning(f"Invalid question detected for {resp_name}: {extracted_question}")
242
  return ["**PreData Moderator**: The question is invalid. Please ask another question."]
243
-
244
- # Use validated questions from this point on
245
  parsed_questions = validated_questions
246
- logging.info(f"Validated questions: {parsed_questions}")
247
-
248
- if len(parsed_questions) > 1:
249
- logging.warning("More than one respondent specified. Exiting function.")
250
- return "**PreData Moderator**: Please ask each respondent one question at a time."
251
- else:
252
- print(f"Parsed questions are: {parsed_questions}")
253
-
254
- if "General" in parsed_questions:
255
- if "General" in parsed_questions:
256
- if isinstance(last_active_agent, list) and all(name in agent_names for name in last_active_agent):
257
- logging.info(f"General case detected. Continuing with last active agent: {last_active_agent}")
258
- parsed_questions = {name: parsed_questions["General"] for name in last_active_agent}
259
- else:
260
- logging.info("General case detected without a valid previous active agent. Assigning question to all respondents.")
261
- parsed_questions = {name: parsed_questions["General"] for name in agent_names}
262
- elif "All" in parsed_questions:
263
- logging.info("All case detected. Assigning question to all respondents.")
264
- validated_question = parsed_questions["All"]
265
- parsed_questions = {name: validated_question for name in agent_names}
266
-
267
-
268
 
 
 
269
 
270
  last_active_agent = list(parsed_questions.keys())
271
- logging.info(f"Final parsed questions: {parsed_questions}")
272
-
273
- # Construct one crew and task for each agent and question
274
  responses = []
275
 
276
  for agent_name, agent_question in parsed_questions.items():
277
- if agent_name not in respondent_agents_dict:
278
- logging.warning(f"No valid respondent found for {agent_name}. Skipping.")
279
  responses.append(f"**PreData Moderator**: {agent_name} is not a valid respondent.")
280
  continue
281
 
282
- respondent_agent = respondent_agents_dict[agent_name].get_agent()
283
- user_profile = respondent_agents_dict[agent_name].get_user_profile()
284
-
285
- # communication_style = user_profile.get_field("Communication", "Style")
286
- communication_style = ""
287
-
288
- question_task_description = f"""
289
- You are {agent_name}. You are responding to a market research interview question. Your response must strictly follow the *style and tone* and *Hard Rules – You Must Follow These Without Exception* outlined below.
290
- ---
291
- ### *Communication Profile Reference:*
292
- - **Style:** {user_profile.get_field('Communication', 'Style')}
293
- - **Tone:** {user_profile.get_field('Communication', 'Tone')}
294
- - **Length:** {user_profile.get_field('Communication', 'Length')}
295
- - **Topics:** {user_profile.get_field('Communication', 'Topics')}
296
- ---
297
- ---
298
- ### 🔒 **Hard Rules – You Must Follow These Without Exception**
299
- - You must answer **only the question(s)** that are **explicitly asked**.
300
- - **Never provide extra information** beyond what was asked.
301
- - Keep your response **as short as possible** while still sounding natural and complete.
302
- - Do **not infer or assume** what the user *might* want — only respond to what they *actually* asked.
303
- - If multiple questions are asked, respond to **each one briefly**, and **nothing else**.
304
- - If the question is vague, respond minimally and only within that scope.
305
- -Give concise answers, whether the question is asked to the group or individually.
306
- -For factual or demographic questions (e.g., age, gender, location, housing), keep responses brief and to the point, without extra commentary.
307
- -Do not add any explanations, opinions, or additional information.
308
- -Use simple, clear sentences.
309
- -Example:
310
- Q: Where are you from?
311
- A: I’m from [city], [country](DO NOT ADD ANY EXTRA COMMENTS).
312
- -For reflective or opinion-based questions (e.g., feelings, preferences, motivations), provide thoughtful but still clear and focused answers.
313
- -Never repeat the question or add unrelated background information.
314
- ---
315
- ### **How to Answer:**
316
- - Your response should be **natural, authentic, and fully aligned** with the specified style and tone.
317
- - Ensure the answer is **clear, engaging, and directly relevant** to the question.
318
- - Adapt your **sentence structure, phrasing, and word choices** to match the intended communication style.
319
- - If applicable, incorporate **culturally relevant expressions, regional nuances, or industry-specific terminology** that fit the given tone.
320
- - **Adjust response length** based on the tone—**concise and direct** for casual styles, **structured and detailed** for professional styles.
321
- - **Always answer in first person ("I", "my", "me", "mine", etc.) as if you are personally responding to the question. You are an individual representing yourself, not speaking in third person.**
322
- -Always answer as if you are the individual being directly spoken to. Use first-person language such as “I,” “me,” “my,” and “mine” in every response. Imagine you are having a real conversation — your tone should feel natural, personal, and authentic. Do not refer to yourself in the third person (e.g., “She is from Trichy” or “Meena likes…”). Avoid describing yourself as if someone else is talking about you.
323
- -Everything you say should come from your own perspective, just like you would in everyday speech. The goal is to sound human, relatable, and direct — like you're truly present in the conversation.
324
- ---
325
- ### **Guidelines for Ensuring Authenticity & Alignment:**
326
- - **Consistency**: Maintain the same tone throughout the response.
327
- - **Authenticity**: The response should feel natural and match the speaker’s persona.
328
- - **Avoid Overgeneralisation**: Ensure responses are specific and not overly generic or robotic.
329
- - **Cultural & Linguistic Relevance**: Adapt language and references to match the speaker’s background, industry, or region where appropriate.
330
- - **Strict British Spelling & Grammar**:
331
- - All responses must use correct British English spelling, grammar, and usage, **irrespective of how the question is phrased**.
332
- - You must not mirror any American spelling, terminology, or phrasing found in the input question.
333
- - Where there are regional variations (e.g. 'licence' vs 'license', 'programme' vs 'program', 'aeroplane' vs 'airplane'), always default to the standard British form.
334
- - Examples:
335
- - **Correct (British):** organised, prioritise, minimise, realise, behaviour, centre, defence, travelling, practise (verb), licence (noun), programme, aeroplane.
336
- - **Incorrect (American):** organized, prioritize, minimize, realize, behavior, center, defense, traveling, practice (verb and noun), license (noun), program, airplane.
337
- - **Formatting**:
338
- - If the tone is informal, allow a conversational flow that mirrors natural speech.
339
- - If the tone is formal, use a structured and professional format.
340
- - **Do not include emojis or hashtags in the response.**
341
- - Maintain **narrative and thematic consistency** across all answers to simulate a coherent personality.
342
- -**Personality Profile Alignment:**
343
- -Consider your assigned personality traits across these dimensions:
344
- -Big Five Traits:
345
- -Openness: Reflect your level of curiosity, creativity, and openness to new experiences
346
- -Conscientiousness: Show your degree of organization, responsibility, and planning
347
- -Extraversion: Express your sociability and energy level in interactions
348
- -Agreeableness: Demonstrate your warmth, cooperation, and consideration for others
349
- -Neuroticism: Consider your emotional stability and stress response
350
- -Values and Priorities:
351
- -Achievement Orientation: Show your drive for success and goal-setting approach
352
- -Risk Tolerance: Express your comfort with uncertainty and change
353
- -Traditional Values: Reflect your adherence to conventional norms and practices
354
- -Communication Style:
355
- -Detail Orientation: Demonstrate your preference for specific vs. general information
356
- -Complexity: Show your comfort with nuanced vs. straightforward explanations
357
- -Directness: Express your communication as either straightforward or diplomatic
358
- -Emotional Expressiveness: Reflect your tendency to share or withhold emotions
359
- -Your responses must consistently align with these personality traits from your profile.
360
- ---
361
- ### **Example Responses (for Different Styles & Tones)**
362
- #### **Casual & Conversational Tone**
363
- **Question:** "How do you stay updated on the latest fashion and tech trends?"
364
- **Correct Response:**
365
- "I keep up with trends by following influencers on Instagram and watching product reviews on YouTube. Brands like Noise and Boat always drop stylish, affordable options, so I make sure to stay ahead of the curve."
366
- #### **Formal & Professional Tone**
367
- **Question:** "How do you stay updated on the latest fashion and tech trends?"
368
- **Correct Response:**
369
- "I actively follow industry trends by reading reports, attending webinars, and engaging with thought leaders on LinkedIn. I also keep up with global fashion and technology updates through leading publications such as *The Business of Fashion* and *TechCrunch*."
370
- ---
371
- Your final answer should be **a well-structured response that directly answers the question while maintaining the specified style and tone**:
372
- **"{agent_question}"**
373
- """
374
 
375
- question_task_expected_output = f"""
376
- A culturally authentic and conversational response to the question: '{agent_question}'.
377
- - The response must reflect the respondent's **local cultural background and geographic influences**, ensuring it aligns with their **speech patterns, preferences, and linguistic style**.
378
- - The language must follow **strict British English spelling conventions**, ensuring it is **natural, personal, and free-flowing**, while strictly avoiding American spelling, phrasing, or grammar under any circumstances, regardless of the spelling, grammar, or vocabulary used in the input question.
379
- - The response **must not introduce the respondent**, nor include placeholders like "[Your Name]" or "[Brand Name]".
380
- - The response **must always be written in first person ("I", "my", "me", etc.) as if the respondent is personally answering the question directly. Third-person narration is never allowed.**
381
- - The final output should be a **single, well-structured paragraph that directly answers the question** while staying fully aligned with the specified communication style.
382
- """
383
-
384
- question_task = Task(
385
- description=question_task_description,
386
- expected_output=question_task_expected_output,
387
- agent=respondent_agent
388
- )
389
-
390
- logging.debug(f"Created task for agent '{agent_name}' with description: {question_task_description}")
391
-
392
- # Log before starting task execution
393
- logging.info(f"Executing task for agent '{agent_name}'")
394
-
395
- # Create a new crew for each agent-question pair
396
- crew = Crew(
397
- agents=[respondent_agent],
398
- tasks=[question_task],
399
- process=Process.sequential
400
  )
401
- logging.debug(f"Crew initialized for agent '{agent_name}' with 1 task and sequential process")
402
-
403
- max_attempts = 3
404
- attempt = 0
405
- validated = False
406
- validated_answer = None
407
- while attempt < max_attempts and not validated:
408
- try:
409
- logging.info(f"Starting Response validation attempt {attempt+1} for agent '{agent_name}'")
410
- crew_output = crew.kickoff()
411
- logging.info(f"Task execution completed for agent '{agent_name}' (attempt {attempt+1})")
412
- task_output = question_task.output
413
- logging.debug(f"Raw output from agent '{agent_name}': {getattr(task_output, 'raw', str(task_output))}")
414
- answer = task_output.raw if hasattr(task_output, 'raw') else str(task_output)
415
- logging.info(f"Validating response for agent '{agent_name}' (attempt {attempt+1}): {answer}")
416
- # Validate the response using validate_response from validation_utils
417
- is_valid = validate_response(
418
- question=agent_question,
419
- answer=answer,
420
- user_profile_str=str(user_profile),
421
- fast_facts_str="",
422
- interview_transcript_text="",
423
- respondent_type=agent_name,
424
- ai_evaluator_agent=None,
425
- processor_llm=processor_llm
426
- )
427
- logging.info(f"Response Validation result for agent '{agent_name}' (attempt {attempt+1}): {is_valid}")
428
- if is_valid:
429
- validated = True
430
- validated_answer = answer
431
- logging.info(f"Response for agent '{agent_name}' passed validation on attempt {attempt+1}")
432
- break
433
- else:
434
- attempt += 1
435
- logging.warning(f"Response failed response validation for agent '{agent_name}' (attempt {attempt}). Retrying...")
436
- except Exception as e:
437
- logging.error(f"Error during task execution for agent '{agent_name}' (attempt {attempt+1}): {str(e)}", exc_info=True)
438
- attempt += 1
439
- # --- End validation and retry loop ---
440
-
441
- if validated_answer:
442
- formatted_response = f"**{agent_name}**: {validated_answer}"
443
- responses.append(formatted_response)
444
- logging.info(f"Validated response from agent '{agent_name}' added to responses")
445
- else:
446
- fallback_response = f"**PreData Moderator**: Unable to pass validation after {max_attempts} attempts for {agent_name}."
447
- responses.append(fallback_response)
448
- logging.warning(f"No validated output from agent '{agent_name}' after {max_attempts} attempts. Added fallback response.")
449
- logging.info(f"All responses generated: {responses}")
450
-
451
- if len(set(parsed_questions.values())) == 1:
452
- combined_output = "\n\n".join(responses)
453
- return [combined_output]
454
- else:
455
- return responses
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
 
209
  def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
210
+ logging.info(f"Received question: {question}")
 
 
 
 
 
 
 
 
211
  agent_names = list(respondent_agents_dict.keys())
 
 
212
 
213
+ # Step 1: Parse and validate questions
 
 
 
214
  parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
 
 
215
  if not parsed_questions:
 
216
  return ["**PreData Moderator**: No valid respondents were detected for this question."]
217
 
 
 
218
  validated_questions = validate_question_topics(parsed_questions, processor_llm)
219
+ for resp, q in validated_questions.items():
220
+ if q == "INVALID":
 
 
 
221
  return ["**PreData Moderator**: The question is invalid. Please ask another question."]
 
 
222
  parsed_questions = validated_questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ if len(parsed_questions) > 1:
225
+ return ["**PreData Moderator**: Please ask each respondent one question at a time."]
226
 
227
  last_active_agent = list(parsed_questions.keys())
 
 
 
228
  responses = []
229
 
230
  for agent_name, agent_question in parsed_questions.items():
231
+ agent_entry = respondent_agents_dict.get(agent_name)
232
+ if not agent_entry:
233
  responses.append(f"**PreData Moderator**: {agent_name} is not a valid respondent.")
234
  continue
235
 
236
+ # === Step 1: Generate raw answer ===
237
+ raw_answer = generate_generic_answer(agent_name, agent_question, agent_entry.get_agent())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
+ # === Step 2: Stylise answer ===
240
+ styled_answer = stylise_answer_to_profile(
241
+ raw_answer,
242
+ agent_name,
243
+ agent_entry.get_user_profile(),
244
+ processor_llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  )
246
+
247
+ # === Step 3: Final validation ===
248
+ if not validate_final_answer(styled_answer):
249
+ responses.append(f"**PreData Moderator**: The answer could not be validated.")
250
+ continue
251
+
252
+ responses.append(f"**{agent_name}**: {styled_answer}")
253
+
254
+ return responses
255
+
256
+
257
+ # === STEP 1: GENERATE RAW ANSWER ===
258
+ def generate_generic_answer(agent_name, question, agent):
259
+ prompt = f"""
260
+ You are {agent_name}. Answer the following question naturally and authentically in first person.
261
+ Use British English. Do not apply any tone or formatting rules.
262
+
263
+ ### Question:
264
+ "{question}"
265
+ """
266
+ task = Task(description=prompt, expected_output="", agent=agent)
267
+ Crew(agents=[agent], tasks=[task], process=Process.sequential).kickoff()
268
+ return task.output.raw.strip()
269
+
270
+
271
+ # === STEP 2: STYLISE ANSWER TO PROFILE ===
272
+ def stylise_answer_to_profile(raw_answer, agent_name, user_profile, processor_llm):
273
+ communication_style = user_profile.get_field("Communication", "Style") or "conversational"
274
+ prompt = f"""
275
+ Rephrase the following response into a {communication_style} tone using British English.
276
+ Keep it in first person. Do not change the meaning or add new content.
277
+
278
+ ### Original:
279
+ "{raw_answer}"
280
+ """
281
+ response = processor_llm.invoke(prompt)
282
+ return response.content.strip()
283
+
284
+
285
+ # === STEP 3: FINAL OUTPUT VALIDATION ===
286
+ def validate_final_answer(answer):
287
+ return bool(answer and len(answer.split()) > 2) # Example: check it's not empty or too short
288
+
289
+
290
+ # === PARSE QUESTIONS WITH LLM (Your existing code or external import) ===
291
+ def parse_question_with_llm(question, respondent_names, processor_llm):
292
+ prompt = f"""
293
+ You are an expert in market research interview analysis.
294
+ Your task is to identify respondents mentioned in the question and extract the exact question posed to them.
295
+
296
+ ### User Input:
297
+ {question}
298
+
299
+ ### Instructions:
300
+ 1. Identify each respondent being addressed.
301
+ 2. Extract the exact question posed to them.
302
+ 3. Use "General" if no specific name is mentioned. Use "All" if it's for everyone.
303
+ 4. If the question is out of scope, return "INVALID" as the question.
304
+
305
+ ### Format:
306
+ - Respondent: <Respondent Name>
307
+ Question: <Extracted Question>
308
+ """
309
+ response = processor_llm.invoke(prompt)
310
+ chatgpt_output = response.content.strip()
311
+
312
+ parsed_questions = {}
313
+ lines = chatgpt_output.split("\n")
314
+ respondent_name = "General"
315
+ question_text = None
316
+
317
+ for line in lines:
318
+ if "- Respondent:" in line:
319
+ respondent_name = line.split(":")[1].strip()
320
+ elif "Question:" in line:
321
+ question_text = line.split(":")[1].strip()
322
+ if question_text:
323
+ parsed_questions[respondent_name] = question_text
324
+
325
+ return parsed_questions
326
+
327
+
328
+ # === VALIDATE QUESTIONS FOR TOPIC SCOPE (Your existing logic) ===
329
+ def validate_question_topics(parsed_questions, processor_llm):
330
+ validated = {}
331
+ for respondent, question in parsed_questions.items():
332
+ prompt = f"""
333
+ You are a research analyst. Validate whether the question is in the allowed topic scope and convert it to British English.
334
+
335
+ ### Question:
336
+ {question}
337
+
338
+ ### If invalid:
339
+ Return exactly "INVALID"
340
+
341
+ ### Permitted Topics:
342
+ - Demographics
343
+ - Values & Beliefs
344
+ - Career & Aspirations
345
+ - Influences
346
+ - Interests & Hobbies
347
+ - Health & Lifestyle
348
+ - Social Media & Tech
349
+ - Personal Relationships
350
+ - Future Outlook
351
+ - Social & Societal Issues
352
+ - Lifestyle Preferences
353
+ - Personal Growth
354
+
355
+ ### Output:
356
+ """
357
+ result = processor_llm.invoke(prompt)
358
+ validated[respondent] = result.content.strip()
359
+ return validated