student_sample_panel / common /ResponseValidation.py
scormon-predata-ai's picture
Update common/ResponseValidation.py
7e4b656 verified
raw
history blame
12.2 kB
import logging
import gradio as gr
import re
from RespondentAgent import *
from langchain_groq import ChatGroq
def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
logging.info("[Style Match Check] Entry")
try:
# --- Step 1: Skip style check for factual questions ---
factual_keywords = [
"name", "age", "where are you from", "where do you live", "occupation",
"birthplace", "what do you do", "how old", "which city", "which country"
]
lower_q = agent_question.strip().lower()
is_factual = any(kw in lower_q for kw in factual_keywords)
if is_factual:
logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
if return_explanation:
return True, None
return True
# --- Step 2: First-person or collective pronoun check ---
logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")
if respondent_type == "FOCUS GROUP":
pronoun_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response is appropriate for a **focus group**, which must:
- Use collective language ("we", "our", "us", "some of us", "most participants")
- Avoid any first-person singular language ("I", "me", "my", etc.)
- Speak as a group, not as an individual
Check the response below and answer in the following format:
Focus Group Style: Yes
or
Focus Group Style: No
Reason: <short reason>
---
### Question:
{agent_question}
### Response:
{answer}
"""
response = processor_llm.invoke(pronoun_prompt)
result = response.content.strip().lower()
if "focus group style: no" in result:
explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
return (False, explanation) if return_explanation else False
else:
# INDIVIDUAL — use first-person pronoun validation
fp_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response uses a personal **first-person** tone, appropriate for an individual.
- Look for use of "I", "me", "my", "mine", or implied personal ownership.
- Skip judgment on content quality or grammar — just the perspective.
Respond using this format:
First Person: Yes
or
First Person: No
Reason: <short explanation>
---
### Question:
{agent_question}
### Response:
{answer}
"""
fp_response = processor_llm.invoke(fp_prompt)
fp_result = fp_response.content.strip().lower()
if "first person: no" in fp_result:
explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
return (False, explanation) if return_explanation else False
# --- Step 3: Communication style match ---
style = user_profile.get_field("Communication", "Style")
tone = user_profile.get_field("Communication", "Tone")
length = user_profile.get_field("Communication", "Length")
topics = user_profile.get_field("Communication", "Topics")
style_check_prompt = f"""
You are a communication coach and writing style analyst.
Evaluate how well the following response aligns with the given communication profile.
---
### Response:
{answer}
### Communication Profile:
- Style: {style}
- Tone: {tone}
- Preferred Length: {length}
- Common Topics: {topics}
---
### Instructions:
Assess whether the response reflects the user's typical communication style.
Respond with only one of:
- Style Match: Yes
- Style Match: Mostly
- Style Match: No
"""
style_response = processor_llm.invoke(style_check_prompt)
style_result = style_response.content.strip().lower()
if "style match: yes" in style_result or "style match: mostly" in style_result:
return (True, None) if return_explanation else True
if "style match: no" in style_result:
explanation_prompt = f"""
You are a communication coach.
The following response was judged as **not matching** the profile. Briefly explain why.
---
Response: {answer}
Style: {style}
Tone: {tone}
Length: {length}
Topics: {topics}
"""
explanation_response = processor_llm.invoke(explanation_prompt)
explanation = explanation_response.content.strip()
logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
return (False, explanation) if return_explanation else False
# Fallback
logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
return (False, f"Unexpected format: {style_result}") if return_explanation else False
except Exception as e:
logging.error(f"[Style Match Check] Exception: {e}")
return (False, str(e)) if return_explanation else False
def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
llm_mode_prompt = f"""
You are an expert in market research interview analysis. Given the following question, determine if it is:
- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
- Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
Respondent Type: {respondent_type}
Question: {question}
Output strictly in this format:
Evaluation Mode: <Exploratory or Fact-based>
"""
response = processor_llm.invoke(llm_mode_prompt)
output = response.content.strip()
evaluation_mode = "exploratory"
for line in output.split("\n"):
if line.lower().startswith("evaluation mode:"):
val = line.split(":", 1)[1].strip().lower()
if "fact" in val:
evaluation_mode = "factbased"
else:
evaluation_mode = "exploratory"
logging.info(f"LLM determined evaluation mode: {evaluation_mode}")
if evaluation_mode == "exploratory":
eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Plausibility** – Does the response make sense given what is known about the respondent?
- Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
- Is the answer **internally consistent** and **realistic** for someone like this respondent?
- Does it feel like something a person in their position would genuinely say or experience?
- Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
- A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
2. **Relevance** – Does the answer directly and fully address the specific question asked?
- Check whether the response clearly **answers the intent of the question** without deflection or vagueness.
- Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
- Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt?
- A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**.
Output strictly in this format:
Plausibility Rating: <0-10>
Relevance Rating: <0-10>
If either rating is less than 8, provide a short reason for each below:
Plausibility Reason: <reason>
Relevance Reason: <reason>
"""
eval_response = processor_llm.invoke(eval_prompt)
eval_text = eval_response.content.strip()
plausibility = None
relevance = None
plaus_reason = None
relev_reason = None
for line in eval_text.split("\n"):
if line.lower().startswith("plausibility rating:"):
try:
plausibility = float(line.split(":", 1)[1].strip())
except Exception as e:
logging.error(f"Error parsing plausibility rating: {e}")
if line.lower().startswith("relevance rating:"):
try:
relevance = float(line.split(":", 1)[1].strip())
except Exception as e:
logging.error(f"Error parsing relevance rating: {e}")
if line.lower().startswith("plausibility reason:"):
plaus_reason = line.split(":", 1)[1].strip()
if line.lower().startswith("relevance reason:"):
relev_reason = line.split(":", 1)[1].strip()
logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
if plausibility is not None and relevance is not None:
valid = plausibility >= 8.0 and relevance >= 8.0
if return_explanation:
feedback = []
if plausibility < 8.0 and plaus_reason:
feedback.append(f"Plausibility: {plaus_reason}")
if relevance < 8.0 and relev_reason:
feedback.append(f"Relevance: {relev_reason}")
return valid, "; ".join(feedback) if feedback else None
return valid
if return_explanation:
return False, "Could not parse plausibility/relevance ratings."
return False
else:
logging.info("Performing fact-based evaluation (accuracy)...")
eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
Ignore tone, phrasing, or style. Focus only on factual correctness.
Output strictly in this format:
Accuracy Rating: <0-10>
If the rating is less than 8, provide a short reason below:
Accuracy Reason: <reason>
"""
eval_response = processor_llm.invoke(eval_prompt)
eval_text = eval_response.content.strip()
accuracy = None
accuracy_reason = None
for line in eval_text.split("\n"):
if line.lower().startswith("accuracy rating:"):
try:
accuracy = float(line.split(":", 1)[1].strip())
except Exception as e:
logging.error(f"Error parsing accuracy rating: {e}")
if line.lower().startswith("accuracy reason:"):
accuracy_reason = line.split(":", 1)[1].strip()
logging.info(f"Fact-based evaluation: accuracy={accuracy}")
if accuracy is not None:
valid = accuracy >= 8.0
if return_explanation:
if not valid and accuracy_reason:
return False, accuracy_reason
return valid, None
return valid
if return_explanation:
return False, "Could not parse accuracy rating."
return False