Spaces:

nat232
/

student_sample_panel

Build error

App Files Files Community

mikaelmp commited on Jun 25, 2025

Commit

fbc8125

verified ·

1 Parent(s): 1337d24

Splitting trial

Browse files

Files changed (1) hide show

researchsimulation/InteractiveInterviewChatbot.py +130 -226

researchsimulation/InteractiveInterviewChatbot.py CHANGED Viewed

@@ -207,249 +207,153 @@ def validate_question_topics(parsed_questions, processor_llm):
 def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
-    """
-    Handles both individual and group interview questions while tracking conversation flow.
-    Uses OpenAI's LLM to extract the intended respondent(s) and their specific question(s).
-    Uses Groq's LLM for response generation.
-    """
-    logging.info(f"START: Processing new interview question: {question}")
-    responses = []
     agent_names = list(respondent_agents_dict.keys())
-    logging.info(f"Available respondents: {agent_names}")
-    print(f"Available respondents: {agent_names}")
-    # Use OpenAI LLM to parse questions into individual respondent-specific sub-questions and validate them
-    # Step 1: Parse question
-    logging.info("STEP 1: Parsing question with LLM...")
     parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
-    logging.info(f"Parsed Questions Output: {parsed_questions}")
     if not parsed_questions:
-        logging.warning("No questions were parsed from input.")
         return ["**PreData Moderator**: No valid respondents were detected for this question."]
-    # Step 2: Validate question content (scope + spelling)
-    logging.info("STEP 2: Validating questions for topic relevance and British English...")
     validated_questions = validate_question_topics(parsed_questions, processor_llm)
-    logging.info(f"Validated Questions: {validated_questions}")
-    for resp_name, extracted_question in validated_questions.items():
-        if extracted_question == "INVALID":
-            logging.warning(f"Invalid question detected for {resp_name}: {extracted_question}")
             return ["**PreData Moderator**: The question is invalid. Please ask another question."]
-    # Use validated questions from this point on
     parsed_questions = validated_questions
-    logging.info(f"Validated questions: {parsed_questions}")
-    if len(parsed_questions) > 1:
-        logging.warning("More than one respondent specified. Exiting function.")
-        return "**PreData Moderator**: Please ask each respondent one question at a time."
-    else:
-        print(f"Parsed questions are: {parsed_questions}")
-    if "General" in parsed_questions:
-        if "General" in parsed_questions:
-            if isinstance(last_active_agent, list) and all(name in agent_names for name in last_active_agent):
-                logging.info(f"General case detected. Continuing with last active agent: {last_active_agent}")
-                parsed_questions = {name: parsed_questions["General"] for name in last_active_agent}
-            else:
-                logging.info("General case detected without a valid previous active agent. Assigning question to all respondents.")
-                parsed_questions = {name: parsed_questions["General"] for name in agent_names}
-    elif "All" in parsed_questions:
-        logging.info("All case detected. Assigning question to all respondents.")
-        validated_question = parsed_questions["All"]
-        parsed_questions = {name: validated_question for name in agent_names}
     last_active_agent = list(parsed_questions.keys())
-    logging.info(f"Final parsed questions: {parsed_questions}")
-    # Construct one crew and task for each agent and question
     responses = []
     for agent_name, agent_question in parsed_questions.items():
-        if agent_name not in respondent_agents_dict:
-            logging.warning(f"No valid respondent found for {agent_name}. Skipping.")
             responses.append(f"**PreData Moderator**: {agent_name} is not a valid respondent.")
             continue
-        respondent_agent = respondent_agents_dict[agent_name].get_agent()
-        user_profile     = respondent_agents_dict[agent_name].get_user_profile()
-#        communication_style = user_profile.get_field("Communication", "Style")
-        communication_style = ""
-        question_task_description = f"""
-You are {agent_name}. You are responding to a market research interview question. Your response must strictly follow the *style and tone* and *Hard Rules – You Must Follow These Without Exception* outlined below.
----
-### *Communication Profile Reference:*
-- **Style:** {user_profile.get_field('Communication', 'Style')}
-- **Tone:** {user_profile.get_field('Communication', 'Tone')}
-- **Length:** {user_profile.get_field('Communication', 'Length')}
-- **Topics:** {user_profile.get_field('Communication', 'Topics')}
----
----
-### 🔒 **Hard Rules – You Must Follow These Without Exception**
-- You must answer **only the question(s)** that are **explicitly asked**.
-- **Never provide extra information** beyond what was asked.
-- Keep your response **as short as possible** while still sounding natural and complete.
-- Do **not infer or assume** what the user *might* want — only respond to what they *actually* asked.
-- If multiple questions are asked, respond to **each one briefly**, and **nothing else**.
-- If the question is vague, respond minimally and only within that scope.
--Give concise answers, whether the question is asked to the group or individually.
-    -For factual or demographic questions (e.g., age, gender, location, housing), keep responses brief and to the point, without extra commentary.
-        -Do not add any explanations, opinions, or additional information.
-        -Use simple, clear sentences.
-        -Example:
-            Q: Where are you from?
-            A: I’m from [city], [country](DO NOT ADD ANY EXTRA COMMENTS).
-    -For reflective or opinion-based questions (e.g., feelings, preferences, motivations), provide thoughtful but still clear and focused answers.
-    -Never repeat the question or add unrelated background information.
----
-### **How to Answer:**
-- Your response should be **natural, authentic, and fully aligned** with the specified style and tone.
-- Ensure the answer is **clear, engaging, and directly relevant** to the question.
-- Adapt your **sentence structure, phrasing, and word choices** to match the intended communication style.
-- If applicable, incorporate **culturally relevant expressions, regional nuances, or industry-specific terminology** that fit the given tone.
-- **Adjust response length** based on the tone—**concise and direct** for casual styles, **structured and detailed** for professional styles.
-- **Always answer in first person ("I", "my", "me", "mine", etc.) as if you are personally responding to the question. You are an individual representing yourself, not speaking in third person.**
-    -Always answer as if you are the individual being directly spoken to. Use first-person language such as “I,” “me,” “my,” and “mine” in every response. Imagine you are having a real conversation — your tone should feel natural, personal, and authentic. Do not refer to yourself in the third person (e.g., “She is from Trichy” or “Meena likes…”). Avoid describing yourself as if someone else is talking about you.
-    -Everything you say should come from your own perspective, just like you would in everyday speech. The goal is to sound human, relatable, and direct — like you're truly present in the conversation.
----
-### **Guidelines for Ensuring Authenticity & Alignment:**
-- **Consistency**: Maintain the same tone throughout the response.
-- **Authenticity**: The response should feel natural and match the speaker’s persona.
-- **Avoid Overgeneralisation**: Ensure responses are specific and not overly generic or robotic.
-- **Cultural & Linguistic Relevance**: Adapt language and references to match the speaker’s background, industry, or region where appropriate.
-- **Strict British Spelling & Grammar**:
-  - All responses must use correct British English spelling, grammar, and usage, **irrespective of how the question is phrased**.
-  - You must not mirror any American spelling, terminology, or phrasing found in the input question.
-  - Where there are regional variations (e.g. 'licence' vs 'license', 'programme' vs 'program', 'aeroplane' vs 'airplane'), always default to the standard British form.
-  - Examples:
-    - **Correct (British):** organised, prioritise, minimise, realise, behaviour, centre, defence, travelling, practise (verb), licence (noun), programme, aeroplane.
-    - **Incorrect (American):** organized, prioritize, minimize, realize, behavior, center, defense, traveling, practice (verb and noun), license (noun), program, airplane.
-- **Formatting**:
-  - If the tone is informal, allow a conversational flow that mirrors natural speech.
-  - If the tone is formal, use a structured and professional format.
-- **Do not include emojis or hashtags in the response.**
-- Maintain **narrative and thematic consistency** across all answers to simulate a coherent personality.
--**Personality Profile Alignment:**
-    -Consider your assigned personality traits across these dimensions:
-        -Big Five Traits:
-            -Openness: Reflect your level of curiosity, creativity, and openness to new experiences
-            -Conscientiousness: Show your degree of organization, responsibility, and planning
-            -Extraversion: Express your sociability and energy level in interactions
-            -Agreeableness: Demonstrate your warmth, cooperation, and consideration for others
-            -Neuroticism: Consider your emotional stability and stress response
-        -Values and Priorities:
-            -Achievement Orientation: Show your drive for success and goal-setting approach
-            -Risk Tolerance: Express your comfort with uncertainty and change
-            -Traditional Values: Reflect your adherence to conventional norms and practices
-        -Communication Style:
-            -Detail Orientation: Demonstrate your preference for specific vs. general information
-            -Complexity: Show your comfort with nuanced vs. straightforward explanations
-            -Directness: Express your communication as either straightforward or diplomatic
-            -Emotional Expressiveness: Reflect your tendency to share or withhold emotions
-    -Your responses must consistently align with these personality traits from your profile.
----
-### **Example Responses (for Different Styles & Tones)**
-#### **Casual & Conversational Tone**
-**Question:** "How do you stay updated on the latest fashion and tech trends?"
-**Correct Response:**
-"I keep up with trends by following influencers on Instagram and watching product reviews on YouTube. Brands like Noise and Boat always drop stylish, affordable options, so I make sure to stay ahead of the curve."
-#### **Formal & Professional Tone**
-**Question:** "How do you stay updated on the latest fashion and tech trends?"
-**Correct Response:**
-"I actively follow industry trends by reading reports, attending webinars, and engaging with thought leaders on LinkedIn. I also keep up with global fashion and technology updates through leading publications such as *The Business of Fashion* and *TechCrunch*."
----
-Your final answer should be **a well-structured response that directly answers the question while maintaining the specified style and tone**:
-**"{agent_question}"**
-        """
-        question_task_expected_output = f"""
-A culturally authentic and conversational response to the question: '{agent_question}'.
-- The response must reflect the respondent's **local cultural background and geographic influences**, ensuring it aligns with their **speech patterns, preferences, and linguistic style**.
-- The language must follow **strict British English spelling conventions**, ensuring it is **natural, personal, and free-flowing**, while strictly avoiding American spelling, phrasing, or grammar under any circumstances, regardless of the spelling, grammar, or vocabulary used in the input question.
-- The response **must not introduce the respondent**, nor include placeholders like "[Your Name]" or "[Brand Name]".
-- The response **must always be written in first person ("I", "my", "me", etc.) as if the respondent is personally answering the question directly. Third-person narration is never allowed.**
-- The final output should be a **single, well-structured paragraph that directly answers the question** while staying fully aligned with the specified communication style.
-        """
-        question_task = Task(
-            description=question_task_description,
-            expected_output=question_task_expected_output,
-            agent=respondent_agent
-        )
-        logging.debug(f"Created task for agent '{agent_name}' with description: {question_task_description}")
-        # Log before starting task execution
-        logging.info(f"Executing task for agent '{agent_name}'")
-        # Create a new crew for each agent-question pair
-        crew = Crew(
-            agents=[respondent_agent],
-            tasks=[question_task],
-            process=Process.sequential
         )
-        logging.debug(f"Crew initialized for agent '{agent_name}' with 1 task and sequential process")
-        max_attempts = 3
-        attempt = 0
-        validated = False
-        validated_answer = None
-        while attempt < max_attempts and not validated:
-            try:
-                logging.info(f"Starting Response validation attempt {attempt+1} for agent '{agent_name}'")
-                crew_output = crew.kickoff()
-                logging.info(f"Task execution completed for agent '{agent_name}' (attempt {attempt+1})")
-                task_output = question_task.output
-                logging.debug(f"Raw output from agent '{agent_name}': {getattr(task_output, 'raw', str(task_output))}")
-                answer = task_output.raw if hasattr(task_output, 'raw') else str(task_output)
-                logging.info(f"Validating response for agent '{agent_name}' (attempt {attempt+1}): {answer}")
-                # Validate the response using validate_response from validation_utils
-                is_valid = validate_response(
-                    question=agent_question,
-                    answer=answer,
-                    user_profile_str=str(user_profile),
-                    fast_facts_str="",
-                    interview_transcript_text="",
-                    respondent_type=agent_name,
-                    ai_evaluator_agent=None,
-                    processor_llm=processor_llm
-                )
-                logging.info(f"Response Validation result for agent '{agent_name}' (attempt {attempt+1}): {is_valid}")
-                if is_valid:
-                    validated = True
-                    validated_answer = answer
-                    logging.info(f"Response for agent '{agent_name}' passed validation on attempt {attempt+1}")
-                    break
-                else:
-                    attempt += 1
-                    logging.warning(f"Response failed response validation for agent '{agent_name}' (attempt {attempt}). Retrying...")
-            except Exception as e:
-                logging.error(f"Error during task execution for agent '{agent_name}' (attempt {attempt+1}): {str(e)}", exc_info=True)
-                attempt += 1
-        # --- End validation and retry loop ---
-        if validated_answer:
-            formatted_response = f"**{agent_name}**: {validated_answer}"
-            responses.append(formatted_response)
-            logging.info(f"Validated response from agent '{agent_name}' added to responses")
-        else:
-            fallback_response = f"**PreData Moderator**: Unable to pass validation after {max_attempts} attempts for {agent_name}."
-            responses.append(fallback_response)
-            logging.warning(f"No validated output from agent '{agent_name}' after {max_attempts} attempts. Added fallback response.")
-    logging.info(f"All responses generated: {responses}")
-    if len(set(parsed_questions.values())) == 1:
-        combined_output = "\n\n".join(responses)
-        return [combined_output]
-    else:
-        return responses

 def ask_interview_question(respondent_agents_dict, last_active_agent, question, processor_llm):
+    logging.info(f"Received question: {question}")
     agent_names = list(respondent_agents_dict.keys())
+    # Step 1: Parse and validate questions
     parsed_questions = parse_question_with_llm(question, str(agent_names), processor_llm)
     if not parsed_questions:
         return ["**PreData Moderator**: No valid respondents were detected for this question."]
     validated_questions = validate_question_topics(parsed_questions, processor_llm)
+    for resp, q in validated_questions.items():
+        if q == "INVALID":
             return ["**PreData Moderator**: The question is invalid. Please ask another question."]
     parsed_questions = validated_questions
+    if len(parsed_questions) > 1:
+        return ["**PreData Moderator**: Please ask each respondent one question at a time."]
     last_active_agent = list(parsed_questions.keys())
     responses = []
     for agent_name, agent_question in parsed_questions.items():
+        agent_entry = respondent_agents_dict.get(agent_name)
+        if not agent_entry:
             responses.append(f"**PreData Moderator**: {agent_name} is not a valid respondent.")
             continue
+        # === Step 1: Generate raw answer ===
+        raw_answer = generate_generic_answer(agent_name, agent_question, agent_entry.get_agent())
+        # === Step 2: Stylise answer ===
+        styled_answer = stylise_answer_to_profile(
+            raw_answer,
+            agent_name,
+            agent_entry.get_user_profile(),
+            processor_llm
         )
+        # === Step 3: Final validation ===
+        if not validate_final_answer(styled_answer):
+            responses.append(f"**PreData Moderator**: The answer could not be validated.")
+            continue
+        responses.append(f"**{agent_name}**: {styled_answer}")
+    return responses
+# === STEP 1: GENERATE RAW ANSWER ===
+def generate_generic_answer(agent_name, question, agent):
+    prompt = f"""
+You are {agent_name}. Answer the following question naturally and authentically in first person.
+Use British English. Do not apply any tone or formatting rules.
+### Question:
+"{question}"
+"""
+    task = Task(description=prompt, expected_output="", agent=agent)
+    Crew(agents=[agent], tasks=[task], process=Process.sequential).kickoff()
+    return task.output.raw.strip()
+# === STEP 2: STYLISE ANSWER TO PROFILE ===
+def stylise_answer_to_profile(raw_answer, agent_name, user_profile, processor_llm):
+    communication_style = user_profile.get_field("Communication", "Style") or "conversational"
+    prompt = f"""
+Rephrase the following response into a {communication_style} tone using British English.
+Keep it in first person. Do not change the meaning or add new content.
+### Original:
+"{raw_answer}"
+"""
+    response = processor_llm.invoke(prompt)
+    return response.content.strip()
+# === STEP 3: FINAL OUTPUT VALIDATION ===
+def validate_final_answer(answer):
+    return bool(answer and len(answer.split()) > 2)  # Example: check it's not empty or too short
+# === PARSE QUESTIONS WITH LLM (Your existing code or external import) ===
+def parse_question_with_llm(question, respondent_names, processor_llm):
+    prompt = f"""
+You are an expert in market research interview analysis.
+Your task is to identify respondents mentioned in the question and extract the exact question posed to them.
+### User Input:
+{question}
+### Instructions:
+1. Identify each respondent being addressed.
+2. Extract the exact question posed to them.
+3. Use "General" if no specific name is mentioned. Use "All" if it's for everyone.
+4. If the question is out of scope, return "INVALID" as the question.
+### Format:
+- Respondent: <Respondent Name>
+  Question: <Extracted Question>
+"""
+    response = processor_llm.invoke(prompt)
+    chatgpt_output = response.content.strip()
+    parsed_questions = {}
+    lines = chatgpt_output.split("\n")
+    respondent_name = "General"
+    question_text = None
+    for line in lines:
+        if "- Respondent:" in line:
+            respondent_name = line.split(":")[1].strip()
+        elif "Question:" in line:
+            question_text = line.split(":")[1].strip()
+            if question_text:
+                parsed_questions[respondent_name] = question_text
+    return parsed_questions
+# === VALIDATE QUESTIONS FOR TOPIC SCOPE (Your existing logic) ===
+def validate_question_topics(parsed_questions, processor_llm):
+    validated = {}
+    for respondent, question in parsed_questions.items():
+        prompt = f"""
+You are a research analyst. Validate whether the question is in the allowed topic scope and convert it to British English.
+### Question:
+{question}
+### If invalid:
+Return exactly "INVALID"
+### Permitted Topics:
+- Demographics
+- Values & Beliefs
+- Career & Aspirations
+- Influences
+- Interests & Hobbies
+- Health & Lifestyle
+- Social Media & Tech
+- Personal Relationships
+- Future Outlook
+- Social & Societal Issues
+- Lifestyle Preferences
+- Personal Growth
+### Output:
+"""
+        result = processor_llm.invoke(prompt)
+        validated[respondent] = result.content.strip()
+    return validated