Spaces:

nat232
/

student_sample_panel

Build error

App Files Files Community

student_sample_panel / common /ResponseValidation.py

scormon-predata-ai

Update common/ResponseValidation.py

7e4b656 verified 8 months ago

raw

history blame

12.2 kB

	import logging
	import gradio as gr
	import re

	from RespondentAgent import *
	from langchain_groq import ChatGroq

	def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
	logging.info("[Style Match Check] Entry")

	try:
	# --- Step 1: Skip style check for factual questions ---
	factual_keywords = [
	"name", "age", "where are you from", "where do you live", "occupation",
	"birthplace", "what do you do", "how old", "which city", "which country"
	]
	lower_q = agent_question.strip().lower()
	is_factual = any(kw in lower_q for kw in factual_keywords)
	if is_factual:
	logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
	if return_explanation:
	return True, None
	return True

	# --- Step 2: First-person or collective pronoun check ---
	logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")

	if respondent_type == "FOCUS GROUP":
	pronoun_prompt = f"""
	You are an expert in writing style analysis.
	Determine whether the following response is appropriate for a focus group, which must:
	- Use collective language ("we", "our", "us", "some of us", "most participants")
	- Avoid any first-person singular language ("I", "me", "my", etc.)
	- Speak as a group, not as an individual
	Check the response below and answer in the following format:
	Focus Group Style: Yes
	or
	Focus Group Style: No
	Reason: <short reason>
	---
	### Question:
	{agent_question}
	### Response:
	{answer}
	"""
	response = processor_llm.invoke(pronoun_prompt)
	result = response.content.strip().lower()

	if "focus group style: no" in result:
	explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
	logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
	return (False, explanation) if return_explanation else False
	else:
	# INDIVIDUAL — use first-person pronoun validation
	fp_prompt = f"""
	You are an expert in writing style analysis.
	Determine whether the following response uses a personal first-person tone, appropriate for an individual.
	- Look for use of "I", "me", "my", "mine", or implied personal ownership.
	- Skip judgment on content quality or grammar — just the perspective.
	Respond using this format:
	First Person: Yes
	or
	First Person: No
	Reason: <short explanation>
	---
	### Question:
	{agent_question}
	### Response:
	{answer}
	"""
	fp_response = processor_llm.invoke(fp_prompt)
	fp_result = fp_response.content.strip().lower()

	if "first person: no" in fp_result:
	explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
	logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
	return (False, explanation) if return_explanation else False

	# --- Step 3: Communication style match ---
	style = user_profile.get_field("Communication", "Style")
	tone = user_profile.get_field("Communication", "Tone")
	length = user_profile.get_field("Communication", "Length")
	topics = user_profile.get_field("Communication", "Topics")

	style_check_prompt = f"""
	You are a communication coach and writing style analyst.
	Evaluate how well the following response aligns with the given communication profile.
	---
	### Response:
	{answer}
	### Communication Profile:
	- Style: {style}
	- Tone: {tone}
	- Preferred Length: {length}
	- Common Topics: {topics}
	---
	### Instructions:
	Assess whether the response reflects the user's typical communication style.
	Respond with only one of:
	- Style Match: Yes
	- Style Match: Mostly
	- Style Match: No
	"""
	style_response = processor_llm.invoke(style_check_prompt)
	style_result = style_response.content.strip().lower()

	if "style match: yes" in style_result or "style match: mostly" in style_result:
	return (True, None) if return_explanation else True

	if "style match: no" in style_result:
	explanation_prompt = f"""
	You are a communication coach.
	The following response was judged as not matching the profile. Briefly explain why.
	---
	Response: {answer}
	Style: {style}
	Tone: {tone}
	Length: {length}
	Topics: {topics}
	"""
	explanation_response = processor_llm.invoke(explanation_prompt)
	explanation = explanation_response.content.strip()
	logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
	return (False, explanation) if return_explanation else False

	# Fallback
	logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
	return (False, f"Unexpected format: {style_result}") if return_explanation else False

	except Exception as e:
	logging.error(f"[Style Match Check] Exception: {e}")
	return (False, str(e)) if return_explanation else False

	def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
	llm_mode_prompt = f"""
	You are an expert in market research interview analysis. Given the following question, determine if it is:
	- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
	- Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
	Respondent Type: {respondent_type}
	Question: {question}
	Output strictly in this format:
	Evaluation Mode: <Exploratory or Fact-based>
	"""
	response = processor_llm.invoke(llm_mode_prompt)
	output = response.content.strip()
	evaluation_mode = "exploratory"
	for line in output.split("\n"):
	if line.lower().startswith("evaluation mode:"):
	val = line.split(":", 1)[1].strip().lower()
	if "fact" in val:
	evaluation_mode = "factbased"
	else:
	evaluation_mode = "exploratory"
	logging.info(f"LLM determined evaluation mode: {evaluation_mode}")

	if evaluation_mode == "exploratory":
	eval_prompt = f"""
	You are a market research evaluator. Given the following:
	- User Profile: {user_profile_str}
	- Fast Facts: {fast_facts_str}
	- Interview Transcript: {interview_transcript_text}
	- Respondent Type: {respondent_type}
	- Question: {question}
	- Answer: {answer}
	Rate the answer on a scale of 0–10 for:
	1. Plausibility – Does the response make sense given what is known about the respondent?
	- Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
	- Is the answer internally consistent and realistic for someone like this respondent?
	- Does it feel like something a person in their position would genuinely say or experience?
	- Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
	- A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
	2. Relevance – Does the answer directly and fully address the specific question asked?
	- Check whether the response clearly answers the intent of the question without deflection or vagueness.
	- Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
	- Does the answer stay on-topic and reflect the subject matter or framing of the original prompt?
	- A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
	Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
	Focus strictly on the content quality, truthfulness, and alignment with the question and user profile.
	Output strictly in this format:
	Plausibility Rating: <0-10>
	Relevance Rating: <0-10>
	If either rating is less than 8, provide a short reason for each below:
	Plausibility Reason: <reason>
	Relevance Reason: <reason>
	"""
	eval_response = processor_llm.invoke(eval_prompt)
	eval_text = eval_response.content.strip()
	plausibility = None
	relevance = None
	plaus_reason = None
	relev_reason = None
	for line in eval_text.split("\n"):
	if line.lower().startswith("plausibility rating:"):
	try:
	plausibility = float(line.split(":", 1)[1].strip())
	except Exception as e:
	logging.error(f"Error parsing plausibility rating: {e}")
	if line.lower().startswith("relevance rating:"):
	try:
	relevance = float(line.split(":", 1)[1].strip())
	except Exception as e:
	logging.error(f"Error parsing relevance rating: {e}")
	if line.lower().startswith("plausibility reason:"):
	plaus_reason = line.split(":", 1)[1].strip()
	if line.lower().startswith("relevance reason:"):
	relev_reason = line.split(":", 1)[1].strip()
	logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
	if plausibility is not None and relevance is not None:
	valid = plausibility >= 8.0 and relevance >= 8.0
	if return_explanation:
	feedback = []
	if plausibility < 8.0 and plaus_reason:
	feedback.append(f"Plausibility: {plaus_reason}")
	if relevance < 8.0 and relev_reason:
	feedback.append(f"Relevance: {relev_reason}")
	return valid, "; ".join(feedback) if feedback else None
	return valid
	if return_explanation:
	return False, "Could not parse plausibility/relevance ratings."
	return False

	else:
	logging.info("Performing fact-based evaluation (accuracy)...")
	eval_prompt = f"""
	You are a market research evaluator. Given the following:
	- User Profile: {user_profile_str}
	- Fast Facts: {fast_facts_str}
	- Interview Transcript: {interview_transcript_text}
	- Respondent Type: {respondent_type}
	- Question: {question}
	- Answer: {answer}
	Rate the answer on a scale of 0–10 for:
	1. Accuracy – Does the content align with the user’s facts or transcript, without fabrications?
	Ignore tone, phrasing, or style. Focus only on factual correctness.
	Output strictly in this format:
	Accuracy Rating: <0-10>
	If the rating is less than 8, provide a short reason below:
	Accuracy Reason: <reason>
	"""
	eval_response = processor_llm.invoke(eval_prompt)
	eval_text = eval_response.content.strip()
	accuracy = None
	accuracy_reason = None
	for line in eval_text.split("\n"):
	if line.lower().startswith("accuracy rating:"):
	try:
	accuracy = float(line.split(":", 1)[1].strip())
	except Exception as e:
	logging.error(f"Error parsing accuracy rating: {e}")
	if line.lower().startswith("accuracy reason:"):
	accuracy_reason = line.split(":", 1)[1].strip()
	logging.info(f"Fact-based evaluation: accuracy={accuracy}")
	if accuracy is not None:
	valid = accuracy >= 8.0
	if return_explanation:
	if not valid and accuracy_reason:
	return False, accuracy_reason
	return valid, None
	return valid
	if return_explanation:
	return False, "Could not parse accuracy rating."
	return False