Spaces:
Build error
Build error
File size: 12,157 Bytes
7204280 86a2acf 7204280 7e4b656 91980f8 4c269e9 91980f8 843922f 7e4b656 843922f 7e4b656 843922f 1d95a70 7e4b656 843922f 7e4b656 87e16d4 7e4b656 843922f 7e4b656 9b14f3e 7e4b656 4c269e9 91980f8 4c269e9 1bbcb42 91980f8 4c269e9 1d95a70 4c269e9 91980f8 4c269e9 91980f8 7e4b656 1bbcb42 91980f8 4c269e9 95b929d 1bbcb42 7e4b656 1bbcb42 7e4b656 08871c2 7e4b656 08871c2 7e4b656 08871c2 7e4b656 1bbcb42 7e4b656 c90a492 95b929d 7e4b656 08871c2 5e03f32 7204280 91980f8 7204280 95b929d 7204280 e56d353 7204280 e56d353 736c2ad 91980f8 736c2ad 7e4b656 91980f8 5e03f32 91980f8 e56d353 5e03f32 e56d353 95b929d fc0c76b e56d353 95b929d fc0c76b 5e03f32 e56d353 7204280 5e03f32 7204280 91980f8 7204280 fc0c76b e56d353 736c2ad 91980f8 736c2ad 91980f8 5e03f32 91980f8 e56d353 5e03f32 e56d353 95b929d fc0c76b 5e03f32 e56d353 7204280 5e03f32 7e4b656 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | import logging
import gradio as gr
import re
from RespondentAgent import *
from langchain_groq import ChatGroq
def matches_user_speaking_style(answer, processor_llm, user_profile, agent_question, respondent_type="INDIVIDUAL", return_explanation=False):
logging.info("[Style Match Check] Entry")
try:
# --- Step 1: Skip style check for factual questions ---
factual_keywords = [
"name", "age", "where are you from", "where do you live", "occupation",
"birthplace", "what do you do", "how old", "which city", "which country"
]
lower_q = agent_question.strip().lower()
is_factual = any(kw in lower_q for kw in factual_keywords)
if is_factual:
logging.info("[Style Match Check] Question is factual — skipping strict style enforcement")
if return_explanation:
return True, None
return True
# --- Step 2: First-person or collective pronoun check ---
logging.info(f"[Style Match Check] Performing {'collective' if respondent_type == 'FOCUS GROUP' else 'first-person'} pronoun check")
if respondent_type == "FOCUS GROUP":
pronoun_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response is appropriate for a **focus group**, which must:
- Use collective language ("we", "our", "us", "some of us", "most participants")
- Avoid any first-person singular language ("I", "me", "my", etc.)
- Speak as a group, not as an individual
Check the response below and answer in the following format:
Focus Group Style: Yes
or
Focus Group Style: No
Reason: <short reason>
---
### Question:
{agent_question}
### Response:
{answer}
"""
response = processor_llm.invoke(pronoun_prompt)
result = response.content.strip().lower()
if "focus group style: no" in result:
explanation = result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in result else "The response does not follow focus group voice."
logging.warning(f"[Style Match Check] Failed group tone: {explanation}")
return (False, explanation) if return_explanation else False
else:
# INDIVIDUAL — use first-person pronoun validation
fp_prompt = f"""
You are an expert in writing style analysis.
Determine whether the following response uses a personal **first-person** tone, appropriate for an individual.
- Look for use of "I", "me", "my", "mine", or implied personal ownership.
- Skip judgment on content quality or grammar — just the perspective.
Respond using this format:
First Person: Yes
or
First Person: No
Reason: <short explanation>
---
### Question:
{agent_question}
### Response:
{answer}
"""
fp_response = processor_llm.invoke(fp_prompt)
fp_result = fp_response.content.strip().lower()
if "first person: no" in fp_result:
explanation = fp_result.split("reason:", 1)[-1].strip().capitalize() if "reason:" in fp_result else "The answer is not in first person."
logging.warning(f"[Style Match Check] Failed first-person test: {explanation}")
return (False, explanation) if return_explanation else False
# --- Step 3: Communication style match ---
style = user_profile.get_field("Communication", "Style")
tone = user_profile.get_field("Communication", "Tone")
length = user_profile.get_field("Communication", "Length")
topics = user_profile.get_field("Communication", "Topics")
style_check_prompt = f"""
You are a communication coach and writing style analyst.
Evaluate how well the following response aligns with the given communication profile.
---
### Response:
{answer}
### Communication Profile:
- Style: {style}
- Tone: {tone}
- Preferred Length: {length}
- Common Topics: {topics}
---
### Instructions:
Assess whether the response reflects the user's typical communication style.
Respond with only one of:
- Style Match: Yes
- Style Match: Mostly
- Style Match: No
"""
style_response = processor_llm.invoke(style_check_prompt)
style_result = style_response.content.strip().lower()
if "style match: yes" in style_result or "style match: mostly" in style_result:
return (True, None) if return_explanation else True
if "style match: no" in style_result:
explanation_prompt = f"""
You are a communication coach.
The following response was judged as **not matching** the profile. Briefly explain why.
---
Response: {answer}
Style: {style}
Tone: {tone}
Length: {length}
Topics: {topics}
"""
explanation_response = processor_llm.invoke(explanation_prompt)
explanation = explanation_response.content.strip()
logging.warning(f"[Style Match Check] Style mismatch explanation: {explanation}")
return (False, explanation) if return_explanation else False
# Fallback
logging.warning(f"[Style Match Check] Unclear result format: {style_result}")
return (False, f"Unexpected format: {style_result}") if return_explanation else False
except Exception as e:
logging.error(f"[Style Match Check] Exception: {e}")
return (False, str(e)) if return_explanation else False
def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
llm_mode_prompt = f"""
You are an expert in market research interview analysis. Given the following question, determine if it is:
- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
- Fact-based: objective, factual, or directly verifiable from the respondent's profile or transcript (e.g., age, location, occupation, education, etc.)
Respondent Type: {respondent_type}
Question: {question}
Output strictly in this format:
Evaluation Mode: <Exploratory or Fact-based>
"""
response = processor_llm.invoke(llm_mode_prompt)
output = response.content.strip()
evaluation_mode = "exploratory"
for line in output.split("\n"):
if line.lower().startswith("evaluation mode:"):
val = line.split(":", 1)[1].strip().lower()
if "fact" in val:
evaluation_mode = "factbased"
else:
evaluation_mode = "exploratory"
logging.info(f"LLM determined evaluation mode: {evaluation_mode}")
if evaluation_mode == "exploratory":
eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Plausibility** – Does the response make sense given what is known about the respondent?
- Consider the respondent’s background, demographics, stated preferences, life stage, interests, and prior responses.
- Is the answer **internally consistent** and **realistic** for someone like this respondent?
- Does it feel like something a person in their position would genuinely say or experience?
- Avoid penalising for style — focus purely on whether the answer is believable and fits the persona.
- A low plausibility score indicates the answer seems fabricated, out of character, contradictory, or implausible for this individual or group.
2. **Relevance** – Does the answer directly and fully address the specific question asked?
- Check whether the response clearly **answers the intent of the question** without deflection or vagueness.
- Consider whether it provides a complete and meaningful response — not just a surface-level or partial reply.
- Does the answer stay **on-topic** and reflect the subject matter or framing of the original prompt?
- A low relevance score means the answer is off-topic, evasive, only loosely related, or ignores key elements of the question.
Ignore tone, emotional expression, writing style, grammar, or British/American English differences.
Focus **strictly** on the **content quality**, **truthfulness**, and **alignment with the question and user profile**.
Output strictly in this format:
Plausibility Rating: <0-10>
Relevance Rating: <0-10>
If either rating is less than 8, provide a short reason for each below:
Plausibility Reason: <reason>
Relevance Reason: <reason>
"""
eval_response = processor_llm.invoke(eval_prompt)
eval_text = eval_response.content.strip()
plausibility = None
relevance = None
plaus_reason = None
relev_reason = None
for line in eval_text.split("\n"):
if line.lower().startswith("plausibility rating:"):
try:
plausibility = float(line.split(":", 1)[1].strip())
except Exception as e:
logging.error(f"Error parsing plausibility rating: {e}")
if line.lower().startswith("relevance rating:"):
try:
relevance = float(line.split(":", 1)[1].strip())
except Exception as e:
logging.error(f"Error parsing relevance rating: {e}")
if line.lower().startswith("plausibility reason:"):
plaus_reason = line.split(":", 1)[1].strip()
if line.lower().startswith("relevance reason:"):
relev_reason = line.split(":", 1)[1].strip()
logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
if plausibility is not None and relevance is not None:
valid = plausibility >= 8.0 and relevance >= 8.0
if return_explanation:
feedback = []
if plausibility < 8.0 and plaus_reason:
feedback.append(f"Plausibility: {plaus_reason}")
if relevance < 8.0 and relev_reason:
feedback.append(f"Relevance: {relev_reason}")
return valid, "; ".join(feedback) if feedback else None
return valid
if return_explanation:
return False, "Could not parse plausibility/relevance ratings."
return False
else:
logging.info("Performing fact-based evaluation (accuracy)...")
eval_prompt = f"""
You are a market research evaluator. Given the following:
- User Profile: {user_profile_str}
- Fast Facts: {fast_facts_str}
- Interview Transcript: {interview_transcript_text}
- Respondent Type: {respondent_type}
- Question: {question}
- Answer: {answer}
Rate the answer on a scale of 0–10 for:
1. **Accuracy** – Does the content align with the user’s facts or transcript, without fabrications?
Ignore tone, phrasing, or style. Focus only on factual correctness.
Output strictly in this format:
Accuracy Rating: <0-10>
If the rating is less than 8, provide a short reason below:
Accuracy Reason: <reason>
"""
eval_response = processor_llm.invoke(eval_prompt)
eval_text = eval_response.content.strip()
accuracy = None
accuracy_reason = None
for line in eval_text.split("\n"):
if line.lower().startswith("accuracy rating:"):
try:
accuracy = float(line.split(":", 1)[1].strip())
except Exception as e:
logging.error(f"Error parsing accuracy rating: {e}")
if line.lower().startswith("accuracy reason:"):
accuracy_reason = line.split(":", 1)[1].strip()
logging.info(f"Fact-based evaluation: accuracy={accuracy}")
if accuracy is not None:
valid = accuracy >= 8.0
if return_explanation:
if not valid and accuracy_reason:
return False, accuracy_reason
return valid, None
return valid
if return_explanation:
return False, "Could not parse accuracy rating."
return False |