Spaces:

nat232
/

student_sample_panel

Build error

App Files Files Community

scormon-predata-ai commited on Jul 16, 2025

Commit

7e4b656

verified ·

1 Parent(s): a8669ab

Update common/ResponseValidation.py

Browse files

Files changed (1) hide show

common/ResponseValidation.py +78 -80

common/ResponseValidation.py CHANGED Viewed

@@ -5,13 +5,7 @@ import re
 from RespondentAgent import *
 from langchain_groq import ChatGroq
-def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, return_explanation=False):
-    """
-    Uses the LLM to determine if the answer matches the expected tone and style
-    based on the user's communication profile.
-    Returns (True, None) if it is first-person and stylistically aligned, (False, explanation) otherwise if return_explanation=True.
-    """
     logging.info("[Style Match Check] Entry")
     try:
@@ -23,46 +17,71 @@ def matches_user_speaking_style(answer, processor_llm, user_profile, agent_quest
         lower_q = agent_question.strip().lower()
         is_factual = any(kw in lower_q for kw in factual_keywords)
         if is_factual:
-            logging.info("[Style Match Check] Question is factual — skipping strict first-person enforcement")
             if return_explanation:
                 return True, None
             return True
-        # --- Step 2: Context-sensitive first-person check ---
-        fp_prompt = f"""
-You are an expert in analysing writing style and narrative perspective.
-Determine whether the following response is *stylistically appropriate* and matches a first-person perspective *when contextually expected*.
-- A first-person response typically includes pronouns like "I", "me", "my", "mine", "we", "our", or "us".
-- However, for **short factual responses** (e.g. "What's your name?" → "Alex") or answers that clearly imply personal ownership or involvement (e.g. "Our team led the project"), the absence of explicit first-person pronouns can still be acceptable.
-- The key question is: **Given the question and expected tone, is the response appropriately personal and aligned with a first-person speaking style?**
-Evaluate the response below.
 ### Question:
 {agent_question}
 ### Response:
 {answer}
-Return:
 First Person: Yes
 or
 First Person: No
 Reason: <short explanation>
 """
-        fp_response = processor_llm.invoke(fp_prompt)
-        fp_result = fp_response.content.strip().lower()
-        if "first person: no" in fp_result:
-            explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
-            logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
-            if return_explanation:
-                return False, explanation
-            return False
-        # --- Step 3: Extract user communication profile ---
         style = user_profile.get_field("Communication", "Style")
         tone = user_profile.get_field("Communication", "Tone")
         length = user_profile.get_field("Communication", "Length")
         topics = user_profile.get_field("Communication", "Topics")
-        # --- Step 4: Style validation prompt ---
         style_check_prompt = f"""
 You are a communication coach and writing style analyst.
 Evaluate how well the following response aligns with the given communication profile.
@@ -76,66 +95,41 @@ Evaluate how well the following response aligns with the given communication pro
 - Common Topics: {topics}
 ---
 ### Instructions:
-Assess how well the response aligns with the communication profile.
-- Allow for natural variation and expressive differences.
-- If the tone and structure mostly match, even if not perfect, that’s acceptable.
-- Only return “Style Match: No” if the response clearly *conflicts* with the profile (e.g., too formal, too short, too robotic).
-Respond only with one of:
 - Style Match: Yes
 - Style Match: Mostly
 - Style Match: No
 """
-        logging.info("[Style Match Check] Invoking LLM for profile-based style check")
         style_response = processor_llm.invoke(style_check_prompt)
         style_result = style_response.content.strip().lower()
         if "style match: yes" in style_result or "style match: mostly" in style_result:
-            logging.info("[Style Match Check] Match confirmed (or mostly matched)")
-            if return_explanation:
-                return True, None
-            return True
-        elif "style match: no" in style_result:
-            if "first person: yes" in fp_result:
-                logging.info("[Style Match Check] Potential false negative: First-person check passed but style rejected")
-            # --- Ask LLM for explanation on mismatch ---
             explanation_prompt = f"""
-You are a communication coach and writing style analyst.
-The following response was evaluated as NOT matching the given communication profile.
-Please provide a concise reason why the style does not match.
 ---
-### Response:
-{answer}
-### Communication Profile:
-- Style: {style}
-- Tone: {tone}
-- Preferred Length: {length}
-- Common Topics: {topics}
----
-### Please provide a short reason for style mismatch:
 """
             explanation_response = processor_llm.invoke(explanation_prompt)
             explanation = explanation_response.content.strip()
-            logging.info(f"[Style Match Check] Style mismatch detected: {explanation}")
-            if return_explanation:
-                return False, explanation
-            return False
-        else:
-            logging.warning(f"[Style Match Check] Unexpected output format: {style_result}")
-            if return_explanation:
-                return False, f"Unexpected output format: {style_result}"
-            return False
     except Exception as e:
-        logging.error(f"[Style Match Check] Exception occurred: {e}")
-        if return_explanation:
-            return False, str(e)
-        return False
 def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
     llm_mode_prompt = f"""
@@ -168,13 +162,20 @@ You are a market research evaluator. Given the following:
 - Respondent Type: {respondent_type}
 - Question: {question}
 - Answer: {answer}
 Rate the answer on a scale of 0–10 for:
-1. **Plausibility** – Is the response reasonable and believable given the user’s background?
-2. **Relevance** – Does the answer directly and completely address the question?
-Ignore writing style, grammar, tone, emotional depth, or expressiveness. Focus purely on content quality and alignment with the question.
 Output strictly in this format:
 Plausibility Rating: <0-10>
 Relevance Rating: <0-10>
@@ -228,12 +229,9 @@ You are a market research evaluator. Given the following:
 - Respondent Type: {respondent_type}
 - Question: {question}
 - Answer: {answer}
 Rate the answer on a scale of 0–10 for:
 1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
 Ignore tone, phrasing, or style. Focus only on factual correctness.
 Output strictly in this format:
 Accuracy Rating: <0-10>
 If the rating is less than 8, provide a short reason below:
@@ -261,4 +259,4 @@ Accuracy Reason: <reason>
             return valid
         if return_explanation:
             return False, "Could not parse accuracy rating."
-        return False

 from RespondentAgent import *
 from langchain_groq import ChatGroq
+def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
     logging.info("[Style Match Check] Entry")
     try:
         lower_q = agent_question.strip().lower()
         is_factual = any(kw in lower_q for kw in factual_keywords)
         if is_factual:
+            logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
             if return_explanation:
                 return True, None
             return True
+        # --- Step 2: First-person or collective pronoun check ---
+        logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")
+        if respondent_type == "FOCUS GROUP":
+            pronoun_prompt = f"""
+You are an expert in writing style analysis.
+Determine whether the following response is appropriate for a **focus group**, which must:
+- Use collective language ("we", "our", "us", "some of us", "most participants")
+- Avoid any first-person singular language ("I", "me", "my", etc.)
+- Speak as a group, not as an individual
+Check the response below and answer in the following format:
+Focus Group Style: Yes
+or
+Focus Group Style: No
+Reason: <short reason>
+---
 ### Question:
 {agent_question}
 ### Response:
 {answer}
+"""
+            response = processor_llm.invoke(pronoun_prompt)
+            result = response.content.strip().lower()
+            if "focus group style: no" in result:
+                explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
+                logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
+                return (False, explanation) if return_explanation else False
+        else:
+            # INDIVIDUAL — use first-person pronoun validation
+            fp_prompt = f"""
+You are an expert in writing style analysis.
+Determine whether the following response uses a personal **first-person** tone, appropriate for an individual.
+- Look for use of "I", "me", "my", "mine", or implied personal ownership.
+- Skip judgment on content quality or grammar — just the perspective.
+Respond using this format:
 First Person: Yes
 or
 First Person: No
 Reason: <short explanation>
+---
+### Question:
+{agent_question}
+### Response:
+{answer}
 """
+            fp_response = processor_llm.invoke(fp_prompt)
+            fp_result = fp_response.content.strip().lower()
+            if "first person: no" in fp_result:
+                explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
+                logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
+                return (False, explanation) if return_explanation else False
+        # --- Step 3: Communication style match ---
         style = user_profile.get_field("Communication", "Style")
         tone = user_profile.get_field("Communication", "Tone")
         length = user_profile.get_field("Communication", "Length")
         topics = user_profile.get_field("Communication", "Topics")
         style_check_prompt = f"""
 You are a communication coach and writing style analyst.
 Evaluate how well the following response aligns with the given communication profile.
 - Common Topics: {topics}
 ---
 ### Instructions:
+Assess whether the response reflects the user's typical communication style.
+Respond with only one of:
 - Style Match: Yes
 - Style Match: Mostly
 - Style Match: No
 """
         style_response = processor_llm.invoke(style_check_prompt)
         style_result = style_response.content.strip().lower()
         if "style match: yes" in style_result or "style match: mostly" in style_result:
+            return (True, None) if return_explanation else True
+        if "style match: no" in style_result:
             explanation_prompt = f"""
+You are a communication coach.
+The following response was judged as **not matching** the profile. Briefly explain why.
 ---
+Response: {answer}
+Style: {style}
+Tone: {tone}
+Length: {length}
+Topics: {topics}
 """
             explanation_response = processor_llm.invoke(explanation_prompt)
             explanation = explanation_response.content.strip()
+            logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
+            return (False, explanation) if return_explanation else False
+        # Fallback
+        logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
+        return (False, f"Unexpected format: {style_result}") if return_explanation else False
     except Exception as e:
+        logging.error(f"[Style Match Check] Exception: {e}")
+        return (False, str(e)) if return_explanation else False
 def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
     llm_mode_prompt = f"""
 - Respondent Type: {respondent_type}
 - Question: {question}
 - Answer: {answer}
 Rate the answer on a scale of 0–10 for:
+1. **Plausibility** – Does the response make sense given what is known about the respondent?
+   - Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
+   - Is the answer **internally consistent** and **realistic** for someone like this respondent?
+   - Does it feel like something a person in their position would genuinely say or experience?
+   - Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
+   - A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
+2. **Relevance** – Does the answer directly and fully address the specific question asked?
+   - Check whether the response clearly **answers the intent of the question** without deflection or vagueness.
+   - Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
+   - Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt?
+   - A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
+Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
+Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**.
 Output strictly in this format:
 Plausibility Rating: <0-10>
 Relevance Rating: <0-10>
 - Respondent Type: {respondent_type}
 - Question: {question}
 - Answer: {answer}
 Rate the answer on a scale of 0–10 for:
 1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
 Ignore tone, phrasing, or style. Focus only on factual correctness.
 Output strictly in this format:
 Accuracy Rating: <0-10>
 If the rating is less than 8, provide a short reason below:
             return valid
         if return_explanation:
             return False, "Could not parse accuracy rating."
+        return False