Spaces:
Build error
Build error
Update common/ResponseValidation.py
Browse files- common/ResponseValidation.py +34 -4
common/ResponseValidation.py
CHANGED
|
@@ -138,7 +138,7 @@ Please provide a concise reason why the style does not match.
|
|
| 138 |
|
| 139 |
|
| 140 |
|
| 141 |
-
def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm):
|
| 142 |
llm_mode_prompt = f"""
|
| 143 |
You are an expert in market research interview analysis. Given the following question, determine if it is:
|
| 144 |
- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
|
|
@@ -175,11 +175,16 @@ Please rate the answer on a scale of 0–10 for:
|
|
| 175 |
Output strictly in this format:
|
| 176 |
Plausibility Rating: <0-10>
|
| 177 |
Relevance Rating: <0-10>
|
|
|
|
|
|
|
|
|
|
| 178 |
"""
|
| 179 |
eval_response = processor_llm.invoke(eval_prompt)
|
| 180 |
eval_text = eval_response.content.strip()
|
| 181 |
plausibility = None
|
| 182 |
relevance = None
|
|
|
|
|
|
|
| 183 |
for line in eval_text.split("\n"):
|
| 184 |
if line.lower().startswith("plausibility rating:"):
|
| 185 |
try:
|
|
@@ -191,9 +196,23 @@ Relevance Rating: <0-10>
|
|
| 191 |
relevance = float(line.split(":", 1)[1].strip())
|
| 192 |
except Exception as e:
|
| 193 |
logging.error(f"Error parsing relevance rating: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
|
| 195 |
if plausibility is not None and relevance is not None:
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
return False
|
| 198 |
|
| 199 |
else:
|
|
@@ -210,18 +229,29 @@ Please rate the answer on a scale of 0–10 for:
|
|
| 210 |
1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
|
| 211 |
Output strictly in this format:
|
| 212 |
Accuracy Rating: <0-10>
|
|
|
|
|
|
|
| 213 |
"""
|
| 214 |
eval_response = processor_llm.invoke(eval_prompt)
|
| 215 |
eval_text = eval_response.content.strip()
|
| 216 |
accuracy = None
|
|
|
|
| 217 |
for line in eval_text.split("\n"):
|
| 218 |
if line.lower().startswith("accuracy rating:"):
|
| 219 |
try:
|
| 220 |
accuracy = float(line.split(":", 1)[1].strip())
|
| 221 |
except Exception as e:
|
| 222 |
logging.error(f"Error parsing accuracy rating: {e}")
|
|
|
|
|
|
|
| 223 |
logging.info(f"Fact-based evaluation: accuracy={accuracy}")
|
| 224 |
if accuracy is not None:
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
return False
|
| 227 |
-
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
|
| 141 |
+
def validate_response(question, answer, user_profile_str, fast_facts_str, interview_transcript_text, respondent_type, ai_evaluator_agent, processor_llm, return_explanation=False):
|
| 142 |
llm_mode_prompt = f"""
|
| 143 |
You are an expert in market research interview analysis. Given the following question, determine if it is:
|
| 144 |
- Exploratory: subjective, open-ended, opinion-based, or reflective (e.g., feelings, motivations, preferences, aspirations, values, beliefs, etc.)
|
|
|
|
| 175 |
Output strictly in this format:
|
| 176 |
Plausibility Rating: <0-10>
|
| 177 |
Relevance Rating: <0-10>
|
| 178 |
+
If either rating is less than 8, provide a short reason for each below:
|
| 179 |
+
Plausibility Reason: <reason>
|
| 180 |
+
Relevance Reason: <reason>
|
| 181 |
"""
|
| 182 |
eval_response = processor_llm.invoke(eval_prompt)
|
| 183 |
eval_text = eval_response.content.strip()
|
| 184 |
plausibility = None
|
| 185 |
relevance = None
|
| 186 |
+
plaus_reason = None
|
| 187 |
+
relev_reason = None
|
| 188 |
for line in eval_text.split("\n"):
|
| 189 |
if line.lower().startswith("plausibility rating:"):
|
| 190 |
try:
|
|
|
|
| 196 |
relevance = float(line.split(":", 1)[1].strip())
|
| 197 |
except Exception as e:
|
| 198 |
logging.error(f"Error parsing relevance rating: {e}")
|
| 199 |
+
if line.lower().startswith("plausibility reason:"):
|
| 200 |
+
plaus_reason = line.split(":", 1)[1].strip()
|
| 201 |
+
if line.lower().startswith("relevance reason:"):
|
| 202 |
+
relev_reason = line.split(":", 1)[1].strip()
|
| 203 |
logging.info(f"Exploratory evaluation: plausibility={plausibility}, relevance={relevance}")
|
| 204 |
if plausibility is not None and relevance is not None:
|
| 205 |
+
valid = plausibility >= 8.0 and relevance >= 8.0
|
| 206 |
+
if return_explanation:
|
| 207 |
+
feedback = []
|
| 208 |
+
if plausibility < 8.0 and plaus_reason:
|
| 209 |
+
feedback.append(f"Plausibility: {plaus_reason}")
|
| 210 |
+
if relevance < 8.0 and relev_reason:
|
| 211 |
+
feedback.append(f"Relevance: {relev_reason}")
|
| 212 |
+
return valid, "; ".join(feedback) if feedback else None
|
| 213 |
+
return valid
|
| 214 |
+
if return_explanation:
|
| 215 |
+
return False, "Could not parse plausibility/relevance ratings."
|
| 216 |
return False
|
| 217 |
|
| 218 |
else:
|
|
|
|
| 229 |
1. Accuracy (how well the answer matches the facts in the profile, transcript, or fast facts; penalise any unsupported or fabricated content)
|
| 230 |
Output strictly in this format:
|
| 231 |
Accuracy Rating: <0-10>
|
| 232 |
+
If the rating is less than 8, provide a short reason below:
|
| 233 |
+
Accuracy Reason: <reason>
|
| 234 |
"""
|
| 235 |
eval_response = processor_llm.invoke(eval_prompt)
|
| 236 |
eval_text = eval_response.content.strip()
|
| 237 |
accuracy = None
|
| 238 |
+
accuracy_reason = None
|
| 239 |
for line in eval_text.split("\n"):
|
| 240 |
if line.lower().startswith("accuracy rating:"):
|
| 241 |
try:
|
| 242 |
accuracy = float(line.split(":", 1)[1].strip())
|
| 243 |
except Exception as e:
|
| 244 |
logging.error(f"Error parsing accuracy rating: {e}")
|
| 245 |
+
if line.lower().startswith("accuracy reason:"):
|
| 246 |
+
accuracy_reason = line.split(":", 1)[1].strip()
|
| 247 |
logging.info(f"Fact-based evaluation: accuracy={accuracy}")
|
| 248 |
if accuracy is not None:
|
| 249 |
+
valid = accuracy >= 8.0
|
| 250 |
+
if return_explanation:
|
| 251 |
+
if not valid and accuracy_reason:
|
| 252 |
+
return False, accuracy_reason
|
| 253 |
+
return valid, None
|
| 254 |
+
return valid
|
| 255 |
+
if return_explanation:
|
| 256 |
+
return False, "Could not parse accuracy rating."
|
| 257 |
return False
|
|
|